Tianshou/docs/refs.bib
2020-03-29 15:18:33 +08:00

70 lines
2.5 KiB
BibTeX

@article{DQN,
author = {Volodymyr Mnih and
Koray Kavukcuoglu and
David Silver and
Andrei A. Rusu and
Joel Veness and
Marc G. Bellemare and
Alex Graves and
Martin A. Riedmiller and
Andreas Fidjeland and
Georg Ostrovski and
Stig Petersen and
Charles Beattie and
Amir Sadik and
Ioannis Antonoglou and
Helen King and
Dharshan Kumaran and
Daan Wierstra and
Shane Legg and
Demis Hassabis},
title = {Human-level control through deep reinforcement learning},
journal = {Nature},
volume = {518},
number = {7540},
pages = {529--533},
year = {2015},
url = {https://doi.org/10.1038/nature14236},
doi = {10.1038/nature14236},
timestamp = {Wed, 14 Nov 2018 10:30:43 +0100},
biburl = {https://dblp.org/rec/journals/nature/MnihKSRVBGRFOPB15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DDPG,
author = {Timothy P. Lillicrap and
Jonathan J. Hunt and
Alexander Pritzel and
Nicolas Heess and
Tom Erez and
Yuval Tassa and
David Silver and
Daan Wierstra},
title = {Continuous control with deep reinforcement learning},
booktitle = {4th International Conference on Learning Representations, {ICLR} 2016,
San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings},
year = {2016},
url = {http://arxiv.org/abs/1509.02971},
timestamp = {Thu, 25 Jul 2019 14:25:37 +0200},
biburl = {https://dblp.org/rec/journals/corr/LillicrapHPHETS15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{PPO,
author = {John Schulman and
Filip Wolski and
Prafulla Dhariwal and
Alec Radford and
Oleg Klimov},
title = {Proximal Policy Optimization Algorithms},
journal = {CoRR},
volume = {abs/1707.06347},
year = {2017},
url = {http://arxiv.org/abs/1707.06347},
archivePrefix = {arXiv},
eprint = {1707.06347},
timestamp = {Mon, 13 Aug 2018 16:47:34 +0200},
biburl = {https://dblp.org/rec/journals/corr/SchulmanWDRK17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}