Tianshou/docs/refs.bib

@article{silver2017mastering,
  title={Mastering the game of go without human knowledge},
  author={Silver, David and Schrittwieser, Julian and Simonyan, Karen and Antonoglou, Ioannis and Huang, Aja and Guez, Arthur and Hubert, Thomas and Baker, Lucas and Lai, Matthew and Bolton, Adrian and others},
  journal={Nature},
  volume={550},
  number={7676},
  pages={354},
  year={2017},
  publisher={Nature Publishing Group}
}

@inproceedings{mnih2016asynchronous,
  title={Asynchronous methods for deep reinforcement learning},
  author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
  booktitle={ICML},
  year={2016}
}

@inproceedings{lillicrap2015continuous,
  title={Continuous Control with Deep Reinforcement Learning},
  author={Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
  booktitle={ICLR},
  year={2016}
}

@article{mnih2015human,
  title={Human-level control through deep reinforcement learning},
  author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
  journal={Nature},
  volume={518},
  number={7540},
  pages={529},
  year={2015},
  publisher={Nature Publishing Group}
}

@article{schulman2017proximal,
  title={Proximal policy optimization algorithms},
  author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
  journal={arXiv preprint arXiv:1707.06347},
  year={2017}
}

@inproceedings{schulman2015trust,
  title={Trust region policy optimization},
  author={Schulman, John and Levine, Sergey and Abbeel, Pieter and Jordan, Michael and Moritz, Philipp},
  booktitle={ICML},
  year={2015}
}


@article{kingma2013auto,
  title={Auto-encoding Variational Bayes},
  author={Kingma, Diederik P and Welling, Max},
  journal={arXiv preprint arXiv:1312.6114},
  year={2013}
}

@article{kingma2014adam,
  title={Adam: A method for stochastic optimization},
  author={Kingma, Diederik and Ba, Jimmy},
  journal={arXiv preprint arXiv:1412.6980},
  year={2014}
}

@article{blei2003latent,
  title={Latent dirichlet allocation},
  author={Blei, David M and Ng, Andrew Y and Jordan, Michael I},
  journal={Journal of machine Learning research},
  volume={3},
  number={Jan},
  pages={993--1022},
  year={2003}
}

@article{srivastava2017autoencoding,
  title={Autoencoding variational inference for topic models},
  author={Srivastava, Akash and Sutton, Charles},
  journal={arXiv preprint arXiv:1703.01488},
  year={2017}
}

@inproceedings{miao2016neural,
  title={Neural variational inference for text processing},
  author={Miao, Yishu and Yu, Lei and Blunsom, Phil},
  booktitle={International Conference on Machine Learning},
  pages={1727--1736},
  year={2016}
}

@article{neal2011mcmc,
  title={MCMC using Hamiltonian dynamics},
  author={Neal, Radford M and others},
  journal={Handbook of Markov Chain Monte Carlo},
  volume={2},
  number={11},
  year={2011},
  publisher={CRC Press New York, NY}
}

@article{neal2001annealed,
  title={Annealed importance sampling},
  author={Neal, Radford M},
  journal={Statistics and computing},
  volume={11},
  number={2},
  pages={125--139},
  year={2001},
  publisher={Springer}
}
first master version 2018-04-16 18:02:00 +08:00			`@article{silver2017mastering,`
			`title={Mastering the game of go without human knowledge},`
			`author={Silver, David and Schrittwieser, Julian and Simonyan, Karen and Antonoglou, Ioannis and Huang, Aja and Guez, Arthur and Hubert, Thomas and Baker, Lucas and Lai, Matthew and Bolton, Adrian and others},`
			`journal={Nature},`
			`volume={550},`
			`number={7676},`
			`pages={354},`
			`year={2017},`
			`publisher={Nature Publishing Group}`
			`}`

			`@inproceedings{mnih2016asynchronous,`
			`title={Asynchronous methods for deep reinforcement learning},`
			`author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},`
			`booktitle={ICML},`
			`year={2016}`
			`}`

			`@inproceedings{lillicrap2015continuous,`
			`title={Continuous Control with Deep Reinforcement Learning},`
			`author={Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},`
			`booktitle={ICLR},`
			`year={2016}`
			`}`

			`@article{mnih2015human,`
			`title={Human-level control through deep reinforcement learning},`
			`author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},`
			`journal={Nature},`
			`volume={518},`
			`number={7540},`
			`pages={529},`
			`year={2015},`
			`publisher={Nature Publishing Group}`
			`}`

			`@article{schulman2017proximal,`
			`title={Proximal policy optimization algorithms},`
			`author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},`
			`journal={arXiv preprint arXiv:1707.06347},`
			`year={2017}`
			`}`

			`@inproceedings{schulman2015trust,`
			`title={Trust region policy optimization},`
			`author={Schulman, John and Levine, Sergey and Abbeel, Pieter and Jordan, Michael and Moritz, Philipp},`
			`booktitle={ICML},`
			`year={2015}`
			`}`



			`@article{kingma2013auto,`
			`title={Auto-encoding Variational Bayes},`
			`author={Kingma, Diederik P and Welling, Max},`
			`journal={arXiv preprint arXiv:1312.6114},`
			`year={2013}`
			`}`

			`@article{kingma2014adam,`
			`title={Adam: A method for stochastic optimization},`
			`author={Kingma, Diederik and Ba, Jimmy},`
			`journal={arXiv preprint arXiv:1412.6980},`
			`year={2014}`
			`}`

			`@article{blei2003latent,`
			`title={Latent dirichlet allocation},`
			`author={Blei, David M and Ng, Andrew Y and Jordan, Michael I},`
			`journal={Journal of machine Learning research},`
			`volume={3},`
			`number={Jan},`
			`pages={993--1022},`
			`year={2003}`
			`}`

			`@article{srivastava2017autoencoding,`
			`title={Autoencoding variational inference for topic models},`
			`author={Srivastava, Akash and Sutton, Charles},`
			`journal={arXiv preprint arXiv:1703.01488},`
			`year={2017}`
			`}`

			`@inproceedings{miao2016neural,`
			`title={Neural variational inference for text processing},`
			`author={Miao, Yishu and Yu, Lei and Blunsom, Phil},`
			`booktitle={International Conference on Machine Learning},`
			`pages={1727--1736},`
			`year={2016}`
			`}`

			`@article{neal2011mcmc,`
			`title={MCMC using Hamiltonian dynamics},`
			`author={Neal, Radford M and others},`
			`journal={Handbook of Markov Chain Monte Carlo},`
			`volume={2},`
			`number={11},`
			`year={2011},`
			`publisher={CRC Press New York, NY}`
			`}`

			`@article{neal2001annealed,`
			`title={Annealed importance sampling},`
			`author={Neal, Radford M},`
			`journal={Statistics and computing},`
			`volume={11},`
			`number={2},`
			`pages={125--139},`
			`year={2001},`
			`publisher={Springer}`
			`}`