set default replay buffer size as 1M
This commit is contained in:
		
							parent
							
								
									57ac1c11d3
								
							
						
					
					
						commit
						cd935b7dd9
					
				@ -80,7 +80,7 @@ defaults:
 | 
			
		||||
  ac_opt_eps: 1e-5
 | 
			
		||||
  value_grad_clip: 100
 | 
			
		||||
  actor_grad_clip: 100
 | 
			
		||||
  dataset_size: 0
 | 
			
		||||
  dataset_size: 1000000
 | 
			
		||||
  oversample_ends: False
 | 
			
		||||
  slow_value_target: True
 | 
			
		||||
  slow_target_update: 1
 | 
			
		||||
 | 
			
		||||
@ -252,14 +252,14 @@ class ProcessEpisodeWrap:
 | 
			
		||||
                cls.eval_lengths = []
 | 
			
		||||
                cache.clear()
 | 
			
		||||
 | 
			
		||||
        if mode == "train" and config.dataset_size:
 | 
			
		||||
        if mode == "train":
 | 
			
		||||
            total = 0
 | 
			
		||||
            for key, ep in reversed(sorted(cache.items(), key=lambda x: x[0])):
 | 
			
		||||
                if total <= config.dataset_size - length:
 | 
			
		||||
                if not config.dataset_size or total <= config.dataset_size - length:
 | 
			
		||||
                    total += len(ep["reward"]) - 1
 | 
			
		||||
                else:
 | 
			
		||||
                    del cache[key]
 | 
			
		||||
            logger.scalar("dataset_size", total + length)
 | 
			
		||||
            logger.scalar("dataset_size", total)
 | 
			
		||||
        print(f"{mode.title()} episode has {length} steps and return {score:.1f}.")
 | 
			
		||||
        logger.scalar(f"{mode}_return", score)
 | 
			
		||||
        logger.scalar(f"{mode}_length", length)
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user