Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							e8b44bbaf4 
							
						 
					 
					
						
						
							
							move sac_mcc to examples (runtime too long)  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-22 21:39:00 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							6a2963bd64 
							
						 
					 
					
						
						
							
							fix   #85  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-22 17:11:26 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							a655334d00 
							
						 
					 
					
						
						
							
							change batch.append to batch.cat  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-20 22:23:12 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							aff0f9aee0 
							
						 
					 
					
						
						
							
							fix append batch over batch  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-20 22:03:22 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								youkaichao 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							268f9d0533 
							
						 
					 
					
						
						
							
							type signature correction ( #83 )  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-20 09:57:16 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							81e4a16ef2 
							
						 
					 
					
						
						
							
							fix a bug in re-index replay buffer ( fix   #82 )  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-17 16:37:51 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								danagi 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							c59ad40aef 
							
						 
					 
					
						
						
							
							Add auto alpha tuning and exploration noise for sac. ( #80 )  
						
						 
						
						... 
						
						
						
						Add class BaseNoise and GaussianNoise for the concept of exploration noise.
Add new test for sac tested in MountainCarContinuous-v0,
which should benefits from the two above new feature. 
						
						
							
						
					 
					
						2020-06-16 22:17:28 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							263e490b76 
							
						 
					 
					
						
						
							
							fix   #79  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-16 16:54:16 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							5f2f05a570 
							
						 
					 
					
						
						
							
							fix   #40  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-13 17:06:08 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							3774258cc7 
							
						 
					 
					
						
						
							
							fix unittest  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-11 09:07:45 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							1a914336f7 
							
						 
					 
					
						
						
							
							add random action in collector ( fix   #78 )  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-11 08:57:37 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							397e92b0fc 
							
						 
					 
					
						
						
							
							fix   #77  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-10 12:06:56 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							f1951780ab 
							
						 
					 
					
						
						
							
							fix a bug of storing batch over batch data into buffer  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-09 18:46:14 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							b32b96cd3e 
							
						 
					 
					
						
						
							
							seperate flake8 lint  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-09 10:33:48 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							513573ea82 
							
						 
					 
					
						
						
							
							add link  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-08 22:20:52 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							560116d0b2 
							
						 
					 
					
						
						
							
							cheat sheet  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-08 21:53:00 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Alexis DUBURCQ 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							52be533d06 
							
						 
					 
					
						
						
							
							Enable getattr for SubprocVecEnv. ( #74 )  
						
						 
						
						... 
						
						
						
						* Enable getattr for SubprovVecEnv.
* Consistent API between VectorEnv and SubprocVecEnv.
* Avoid code duplication. Add unit tests.
* Add docstring.
* Test more branches.
* Fix UT.
Co-authored-by: Alexis Duburcq <alexis.duburcq@wandercraft.eu> 
						
						
							
						
					 
					
						2020-06-05 17:17:43 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Alexis DUBURCQ 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							66be5641b6 
							
						 
					 
					
						
						
							
							Fix to_numpy. ( #73 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Alexis Duburcq <alexis.duburcq@wandercraft.eu> 
						
						
							
						
					 
					
						2020-06-04 22:32:05 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							7bf202f195 
							
						 
					 
					
						
						
							
							polish docs  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-03 17:04:26 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							dc451dfe88 
							
						 
					 
					
						
						
							
							nstep all ( fix   #51 )  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-03 13:59:47 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							ff81a18f42 
							
						 
					 
					
						
						
							
							compute_nstep_returns (item 2 of  #51 )  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-02 22:29:50 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							f818a2467b 
							
						 
					 
					
						
						
							
							zh_CN docs  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-02 08:51:14 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							5f2c5347df 
							
						 
					 
					
						
						
							
							v0.2.3  
						
						 
						
						
						
						
							
  v0.2.3
 
						
					 
					
						2020-06-01 09:37:30 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							ba1b3e54eb 
							
						 
					 
					
						
						
							
							fix   #69  
						
						 
						
						
						
						
							
						
					 
					
						2020-06-01 08:30:09 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Alexis DUBURCQ 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							1fce527c77 
							
						 
					 
					
						
						
							
							Fix 'to_tensor' dtype/device forwarding for Batch over Batch. ( #68 )  
						
						 
						
						... 
						
						
						
						* Fix Batch to_torch method not updating dtype/device of already converted data.
* Fix dtype/device to forwarded by to_tensor for Batch over Batch.
* Add Unit test to check to_torch dtype/device recursive forwarding.
* Batch UT check accessing data using both dict and class style.
* Fix utils to_tensor dtype/device forwarding. Add Unit tests.
* Fix UT.
Co-authored-by: Alexis Duburcq <alexis.duburcq@wandercraft.eu>
Co-authored-by: n+e <463003665@qq.com> 
						
						
							
						
					 
					
						2020-05-30 21:40:31 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Alexis DUBURCQ 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							529a4cf44c 
							
						 
					 
					
						
						
							
							Add pickle support for Batch. Fix VectorEnv. ( #67 )  
						
						 
						
						... 
						
						
						
						* Fix vecenv.
* Add pickle support for Batch class.
* Add Batch pickle Unit Test.
* Fix lint.
* Swap Batch UT.
* Fix lint.
Co-authored-by: Alexis Duburcq <alexis.duburcq@wandercraft.eu> 
						
						
							
						
					 
					
						2020-05-30 21:29:33 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Alexis DUBURCQ 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							dd3e2130bb 
							
						 
					 
					
						
						
							
							Infer the right dtype for replay buffers. ( #64 )  
						
						 
						
						
						
						
							
						
					 
					
						2020-05-29 22:27:03 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Alexis DUBURCQ 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							8af7196a9a 
							
						 
					 
					
						
						
							
							Robust conversion from/to numpy/pytorch ( #63 )  
						
						 
						
						... 
						
						
						
						* Enable to convert Batch data back to torch.
* Add torch converter to collector.
* Fix
* Move to_numpy/to_torch convert in dedicated utils.py.
* Use to_numpy/to_torch to convert arrays.
* fix lint
* fix
* Add unit test to check Batch from/to numpy.
* Fix Batch over Batch.
Co-authored-by: Alexis Duburcq <alexis.duburcq@wandercraft.eu> 
						
						
							
						
					 
					
						2020-05-29 20:45:21 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Alexis DUBURCQ 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							b5093ecb56 
							
						 
					 
					
						
						
							
							Minor refactor for Batch class. ( #61 )  
						
						 
						
						... 
						
						
						
						* Minor refactor for Batch class.
* Fix.
* Add back key sorting.
Co-authored-by: Alexis Duburcq <alexis.duburcq@wandercraft.eu> 
						
						
							
						
					 
					
						2020-05-29 17:56:46 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							be9ce44290 
							
						 
					 
					
						
						
							
							fix   #59  
						
						 
						
						
						
						
							
						
					 
					
						2020-05-29 11:49:47 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							d2b2fa87c0 
							
						 
					 
					
						
						
							
							fix   #56  
						
						 
						
						
						
						
							
						
					 
					
						2020-05-29 08:03:37 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							de556fd22d 
							
						 
					 
					
						
						
							
							item3 of  #51  
						
						 
						
						
						
						
							
						
					 
					
						2020-05-27 11:02:23 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								magicly 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							6237cc0d52 
							
						 
					 
					
						
						
							
							fix dqn zero eps ( #52 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: liyan <liyan1@digisky.com> 
						
						
							
						
					 
					
						2020-05-21 11:35:41 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Imone 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							57bca16f94 
							
						 
					 
					
						
						
							
							Fix log_prob and PPO dual_clip ( #49 )  
						
						 
						
						... 
						
						
						
						* Added DiagGaussian to fix log_probg
* Disable PPO dual_clip 
						
						
							
						
					 
					
						2020-05-18 16:23:35 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							70122dc03d 
							
						 
					 
					
						
						
							
							oinit with 0 bias  
						
						 
						
						
						
						
							
						
					 
					
						2020-05-17 17:06:20 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							3271c92609 
							
						 
					 
					
						
						
							
							orthogonal init for ppo in test script  
						
						 
						
						
						
						
							
						
					 
					
						2020-05-16 20:27:01 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							0eef0ca198 
							
						 
					 
					
						
						
							
							fix optional type syntax  
						
						 
						
						
						
						
							
						
					 
					
						2020-05-16 20:08:32 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							3243484f8e 
							
						 
					 
					
						
						
							
							show stat in pytest  
						
						 
						
						
						
						
							
						
					 
					
						2020-05-16 08:48:12 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							9b26137cd2 
							
						 
					 
					
						
						
							
							add type annotation  
						
						 
						
						
						
						
							
						
					 
					
						2020-05-12 11:31:47 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							075825325e 
							
						 
					 
					
						
						
							
							add preprocess_fn ( #42 )  
						
						 
						
						
						
						
							
						
					 
					
						2020-05-05 13:39:51 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							04b091d975 
							
						 
					 
					
						
						
							
							fix max-grad-norm err in a2c ( #46 )  
						
						 
						
						
						
						
							
						
					 
					
						2020-05-04 12:33:04 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							c2a7caf806 
							
						 
					 
					
						
						
							
							add recurrent actor and critic  
						
						 
						
						
						
						
							
						
					 
					
						2020-04-30 16:31:40 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							134f787e24 
							
						 
					 
					
						
						
							
							reserve 'policy' keyword in replay buffer  
						
						 
						
						
						
						
							
						
					 
					
						2020-04-29 17:48:48 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							e58fc78546 
							
						 
					 
					
						
						
							
							build docs  
						
						 
						
						
						
						
							
						
					 
					
						2020-04-29 14:16:38 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							bb2f833d0e 
							
						 
					 
					
						
						
							
							support Batch of Batch and fix bugs ( #38 )  
						
						 
						
						
						
						
							
						
					 
					
						2020-04-29 12:14:53 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								nicoguertler 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							8f718d9b13 
							
						 
					 
					
						
						
							
							Fix log_prob in SAC ( #41 )  
						
						 
						
						
						
						
							
						
					 
					
						2020-04-28 23:44:15 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							69e4b3d301 
							
						 
					 
					
						
						
							
							fix setup err on building docs  
						
						 
						
						
						
						
							
						
					 
					
						2020-04-28 21:11:40 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							80d661907e 
							
						 
					 
					
						
						
							
							Multimodal obs ( #38 ,  #27 ,  #25 )  
						
						 
						
						
						
						
							
						
					 
					
						2020-04-28 20:56:02 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							959955fa2a 
							
						 
					 
					
						
						
							
							fix historical issues  
						
						 
						
						
						
						
							
						
					 
					
						2020-04-26 16:13:51 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Trinkle23897 
							
						 
					 
					
						
						
						
						
							
						
						
							6b96f124ae 
							
						 
					 
					
						
						
							
							fix pdqn  
						
						 
						
						
						
						
							
  v0.2.2
 
						
					 
					
						2020-04-26 15:11:20 +08:00