70 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			BibTeX
		
	
	
	
	
	
			
		
		
	
	
			70 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			BibTeX
		
	
	
	
	
	
@article{DQN,
 | 
						|
  author    = {Volodymyr Mnih and
 | 
						|
               Koray Kavukcuoglu and
 | 
						|
               David Silver and
 | 
						|
               Andrei A. Rusu and
 | 
						|
               Joel Veness and
 | 
						|
               Marc G. Bellemare and
 | 
						|
               Alex Graves and
 | 
						|
               Martin A. Riedmiller and
 | 
						|
               Andreas Fidjeland and
 | 
						|
               Georg Ostrovski and
 | 
						|
               Stig Petersen and
 | 
						|
               Charles Beattie and
 | 
						|
               Amir Sadik and
 | 
						|
               Ioannis Antonoglou and
 | 
						|
               Helen King and
 | 
						|
               Dharshan Kumaran and
 | 
						|
               Daan Wierstra and
 | 
						|
               Shane Legg and
 | 
						|
               Demis Hassabis},
 | 
						|
  title     = {Human-level control through deep reinforcement learning},
 | 
						|
  journal   = {Nature},
 | 
						|
  volume    = {518},
 | 
						|
  number    = {7540},
 | 
						|
  pages     = {529--533},
 | 
						|
  year      = {2015},
 | 
						|
  url       = {https://doi.org/10.1038/nature14236},
 | 
						|
  doi       = {10.1038/nature14236},
 | 
						|
  timestamp = {Wed, 14 Nov 2018 10:30:43 +0100},
 | 
						|
  biburl    = {https://dblp.org/rec/journals/nature/MnihKSRVBGRFOPB15.bib},
 | 
						|
  bibsource = {dblp computer science bibliography, https://dblp.org}
 | 
						|
}
 | 
						|
 | 
						|
@inproceedings{DDPG,
 | 
						|
  author    = {Timothy P. Lillicrap and
 | 
						|
               Jonathan J. Hunt and
 | 
						|
               Alexander Pritzel and
 | 
						|
               Nicolas Heess and
 | 
						|
               Tom Erez and
 | 
						|
               Yuval Tassa and
 | 
						|
               David Silver and
 | 
						|
               Daan Wierstra},
 | 
						|
  title     = {Continuous control with deep reinforcement learning},
 | 
						|
  booktitle = {4th International Conference on Learning Representations, {ICLR} 2016,
 | 
						|
               San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings},
 | 
						|
  year      = {2016},
 | 
						|
  url       = {http://arxiv.org/abs/1509.02971},
 | 
						|
  timestamp = {Thu, 25 Jul 2019 14:25:37 +0200},
 | 
						|
  biburl    = {https://dblp.org/rec/journals/corr/LillicrapHPHETS15.bib},
 | 
						|
  bibsource = {dblp computer science bibliography, https://dblp.org}
 | 
						|
}
 | 
						|
 | 
						|
@article{PPO,
 | 
						|
  author    = {John Schulman and
 | 
						|
               Filip Wolski and
 | 
						|
               Prafulla Dhariwal and
 | 
						|
               Alec Radford and
 | 
						|
               Oleg Klimov},
 | 
						|
  title     = {Proximal Policy Optimization Algorithms},
 | 
						|
  journal   = {CoRR},
 | 
						|
  volume    = {abs/1707.06347},
 | 
						|
  year      = {2017},
 | 
						|
  url       = {http://arxiv.org/abs/1707.06347},
 | 
						|
  archivePrefix = {arXiv},
 | 
						|
  eprint    = {1707.06347},
 | 
						|
  timestamp = {Mon, 13 Aug 2018 16:47:34 +0200},
 | 
						|
  biburl    = {https://dblp.org/rec/journals/corr/SchulmanWDRK17.bib},
 | 
						|
  bibsource = {dblp computer science bibliography, https://dblp.org}
 | 
						|
}
 |