@article{silver2017mastering,
  title={Mastering the game of go without human knowledge},
  author={Silver, David and Schrittwieser, Julian and Simonyan, Karen and Antonoglou, Ioannis and Huang, Aja and Guez, Arthur and Hubert, Thomas and Baker, Lucas and Lai, Matthew and Bolton, Adrian and others},
  journal={Nature},
  volume={550},
  number={7676},
  pages={354},
  year={2017},
  publisher={Nature Publishing Group}
}

@inproceedings{mnih2016asynchronous,
  title={Asynchronous methods for deep reinforcement learning},
  author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
  booktitle={ICML},
  year={2016}
}

@inproceedings{lillicrap2015continuous,
  title={Continuous Control with Deep Reinforcement Learning},
  author={Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
  booktitle={ICLR},
  year={2016}
}

@article{mnih2015human,
  title={Human-level control through deep reinforcement learning},
  author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
  journal={Nature},
  volume={518},
  number={7540},
  pages={529},
  year={2015},
  publisher={Nature Publishing Group}
}

@article{schulman2017proximal,
  title={Proximal policy optimization algorithms},
  author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
  journal={arXiv preprint arXiv:1707.06347},
  year={2017}
}

@inproceedings{schulman2015trust,
  title={Trust region policy optimization},
  author={Schulman, John and Levine, Sergey and Abbeel, Pieter and Jordan, Michael and Moritz, Philipp},
  booktitle={ICML},
  year={2015}
}


@article{kingma2013auto,
  title={Auto-encoding Variational Bayes},
  author={Kingma, Diederik P and Welling, Max},
  journal={arXiv preprint arXiv:1312.6114},
  year={2013}
}

@article{kingma2014adam,
  title={Adam: A method for stochastic optimization},
  author={Kingma, Diederik and Ba, Jimmy},
  journal={arXiv preprint arXiv:1412.6980},
  year={2014}
}

@article{blei2003latent,
  title={Latent dirichlet allocation},
  author={Blei, David M and Ng, Andrew Y and Jordan, Michael I},
  journal={Journal of machine Learning research},
  volume={3},
  number={Jan},
  pages={993--1022},
  year={2003}
}

@article{srivastava2017autoencoding,
  title={Autoencoding variational inference for topic models},
  author={Srivastava, Akash and Sutton, Charles},
  journal={arXiv preprint arXiv:1703.01488},
  year={2017}
}

@inproceedings{miao2016neural,
  title={Neural variational inference for text processing},
  author={Miao, Yishu and Yu, Lei and Blunsom, Phil},
  booktitle={International Conference on Machine Learning},
  pages={1727--1736},
  year={2016}
}

@article{neal2011mcmc,
  title={MCMC using Hamiltonian dynamics},
  author={Neal, Radford M and others},
  journal={Handbook of Markov Chain Monte Carlo},
  volume={2},
  number={11},
  year={2011},
  publisher={CRC Press New York, NY}
}

@article{neal2001annealed,
  title={Annealed importance sampling},
  author={Neal, Radford M},
  journal={Statistics and computing},
  volume={11},
  number={2},
  pages={125--139},
  year={2001},
  publisher={Springer}
}