@article{silver2017mastering, title={Mastering the game of go without human knowledge}, author={Silver, David and Schrittwieser, Julian and Simonyan, Karen and Antonoglou, Ioannis and Huang, Aja and Guez, Arthur and Hubert, Thomas and Baker, Lucas and Lai, Matthew and Bolton, Adrian and others}, journal={Nature}, volume={550}, number={7676}, pages={354}, year={2017}, publisher={Nature Publishing Group} } @inproceedings{mnih2016asynchronous, title={Asynchronous methods for deep reinforcement learning}, author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray}, booktitle={ICML}, year={2016} } @inproceedings{lillicrap2015continuous, title={Continuous Control with Deep Reinforcement Learning}, author={Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan}, booktitle={ICLR}, year={2016} } @article{mnih2015human, title={Human-level control through deep reinforcement learning}, author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others}, journal={Nature}, volume={518}, number={7540}, pages={529}, year={2015}, publisher={Nature Publishing Group} } @article{schulman2017proximal, title={Proximal policy optimization algorithms}, author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg}, journal={arXiv preprint arXiv:1707.06347}, year={2017} } @inproceedings{schulman2015trust, title={Trust region policy optimization}, author={Schulman, John and Levine, Sergey and Abbeel, Pieter and Jordan, Michael and Moritz, Philipp}, booktitle={ICML}, year={2015} } @article{kingma2013auto, title={Auto-encoding Variational Bayes}, author={Kingma, Diederik P and Welling, Max}, journal={arXiv preprint arXiv:1312.6114}, year={2013} } @article{kingma2014adam, title={Adam: A method for stochastic optimization}, author={Kingma, Diederik and Ba, Jimmy}, journal={arXiv preprint arXiv:1412.6980}, year={2014} } @article{blei2003latent, title={Latent dirichlet allocation}, author={Blei, David M and Ng, Andrew Y and Jordan, Michael I}, journal={Journal of machine Learning research}, volume={3}, number={Jan}, pages={993--1022}, year={2003} } @article{srivastava2017autoencoding, title={Autoencoding variational inference for topic models}, author={Srivastava, Akash and Sutton, Charles}, journal={arXiv preprint arXiv:1703.01488}, year={2017} } @inproceedings{miao2016neural, title={Neural variational inference for text processing}, author={Miao, Yishu and Yu, Lei and Blunsom, Phil}, booktitle={International Conference on Machine Learning}, pages={1727--1736}, year={2016} } @article{neal2011mcmc, title={MCMC using Hamiltonian dynamics}, author={Neal, Radford M and others}, journal={Handbook of Markov Chain Monte Carlo}, volume={2}, number={11}, year={2011}, publisher={CRC Press New York, NY} } @article{neal2001annealed, title={Annealed importance sampling}, author={Neal, Radford M}, journal={Statistics and computing}, volume={11}, number={2}, pages={125--139}, year={2001}, publisher={Springer} }