@article{DQN, author = {Volodymyr Mnih and Koray Kavukcuoglu and David Silver and Andrei A. Rusu and Joel Veness and Marc G. Bellemare and Alex Graves and Martin A. Riedmiller and Andreas Fidjeland and Georg Ostrovski and Stig Petersen and Charles Beattie and Amir Sadik and Ioannis Antonoglou and Helen King and Dharshan Kumaran and Daan Wierstra and Shane Legg and Demis Hassabis}, title = {Human-level control through deep reinforcement learning}, journal = {Nature}, volume = {518}, number = {7540}, pages = {529--533}, year = {2015}, url = {https://doi.org/10.1038/nature14236}, doi = {10.1038/nature14236}, timestamp = {Wed, 14 Nov 2018 10:30:43 +0100}, biburl = {https://dblp.org/rec/journals/nature/MnihKSRVBGRFOPB15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DDPG, author = {Timothy P. Lillicrap and Jonathan J. Hunt and Alexander Pritzel and Nicolas Heess and Tom Erez and Yuval Tassa and David Silver and Daan Wierstra}, title = {Continuous control with deep reinforcement learning}, booktitle = {4th International Conference on Learning Representations, {ICLR} 2016, San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings}, year = {2016}, url = {http://arxiv.org/abs/1509.02971}, timestamp = {Thu, 25 Jul 2019 14:25:37 +0200}, biburl = {https://dblp.org/rec/journals/corr/LillicrapHPHETS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{PPO, author = {John Schulman and Filip Wolski and Prafulla Dhariwal and Alec Radford and Oleg Klimov}, title = {Proximal Policy Optimization Algorithms}, journal = {CoRR}, volume = {abs/1707.06347}, year = {2017}, url = {http://arxiv.org/abs/1707.06347}, archivePrefix = {arXiv}, eprint = {1707.06347}, timestamp = {Mon, 13 Aug 2018 16:47:34 +0200}, biburl = {https://dblp.org/rec/journals/corr/SchulmanWDRK17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }