- Fixes an inconsistency in the implementation of Discrete CRR. Now it uses `Critic` class for its critic, following conventions in other actor-critic policies; - Updates several offline policies to use `ActorCritic` class for its optimizer to eliminate randomness caused by parameter sharing between actor and critic; - Add `writer.flush()` in TensorboardLogger to ensure real-time result; - Enable `test_collector=None` in 3 trainers to turn off testing during training; - Updates the Atari offline results in README.md; - Moves Atari offline RL examples to `examples/offline`; tests to `test/offline` per review comments.
		
			
				
	
	
		
			13 lines
		
	
	
		
			190 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			13 lines
		
	
	
		
			190 B
		
	
	
	
		
			Python
		
	
	
	
	
	
| from tianshou import data, env, exploration, policy, trainer, utils
 | |
| 
 | |
| __version__ = "0.4.5"
 | |
| 
 | |
| __all__ = [
 | |
|     "env",
 | |
|     "data",
 | |
|     "utils",
 | |
|     "policy",
 | |
|     "trainer",
 | |
|     "exploration",
 | |
| ]
 |