Tianshou/docs/refs.bib

111 lines
3.4 KiB
BibTeX
Raw Permalink Normal View History

2018-04-16 18:02:00 +08:00
@article{silver2017mastering,
title={Mastering the game of go without human knowledge},
author={Silver, David and Schrittwieser, Julian and Simonyan, Karen and Antonoglou, Ioannis and Huang, Aja and Guez, Arthur and Hubert, Thomas and Baker, Lucas and Lai, Matthew and Bolton, Adrian and others},
journal={Nature},
volume={550},
number={7676},
pages={354},
year={2017},
publisher={Nature Publishing Group}
}
@inproceedings{mnih2016asynchronous,
title={Asynchronous methods for deep reinforcement learning},
author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
booktitle={ICML},
year={2016}
}
@inproceedings{lillicrap2015continuous,
title={Continuous Control with Deep Reinforcement Learning},
author={Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
booktitle={ICLR},
year={2016}
}
@article{mnih2015human,
title={Human-level control through deep reinforcement learning},
author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
journal={Nature},
volume={518},
number={7540},
pages={529},
year={2015},
publisher={Nature Publishing Group}
}
@article{schulman2017proximal,
title={Proximal policy optimization algorithms},
author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
journal={arXiv preprint arXiv:1707.06347},
year={2017}
}
@inproceedings{schulman2015trust,
title={Trust region policy optimization},
author={Schulman, John and Levine, Sergey and Abbeel, Pieter and Jordan, Michael and Moritz, Philipp},
booktitle={ICML},
year={2015}
}
@article{kingma2013auto,
title={Auto-encoding Variational Bayes},
author={Kingma, Diederik P and Welling, Max},
journal={arXiv preprint arXiv:1312.6114},
year={2013}
}
@article{kingma2014adam,
title={Adam: A method for stochastic optimization},
author={Kingma, Diederik and Ba, Jimmy},
journal={arXiv preprint arXiv:1412.6980},
year={2014}
}
@article{blei2003latent,
title={Latent dirichlet allocation},
author={Blei, David M and Ng, Andrew Y and Jordan, Michael I},
journal={Journal of machine Learning research},
volume={3},
number={Jan},
pages={993--1022},
year={2003}
}
@article{srivastava2017autoencoding,
title={Autoencoding variational inference for topic models},
author={Srivastava, Akash and Sutton, Charles},
journal={arXiv preprint arXiv:1703.01488},
year={2017}
}
@inproceedings{miao2016neural,
title={Neural variational inference for text processing},
author={Miao, Yishu and Yu, Lei and Blunsom, Phil},
booktitle={International Conference on Machine Learning},
pages={1727--1736},
year={2016}
}
@article{neal2011mcmc,
title={MCMC using Hamiltonian dynamics},
author={Neal, Radford M and others},
journal={Handbook of Markov Chain Monte Carlo},
volume={2},
number={11},
year={2011},
publisher={CRC Press New York, NY}
}
@article{neal2001annealed,
title={Annealed importance sampling},
author={Neal, Radford M},
journal={Statistics and computing},
volume={11},
number={2},
pages={125--139},
year={2001},
publisher={Springer}
}