diff --git a/configs.yaml b/configs.yaml index 72a8167..00446ae 100644 --- a/configs.yaml +++ b/configs.yaml @@ -80,7 +80,7 @@ defaults: ac_opt_eps: 1e-5 value_grad_clip: 100 actor_grad_clip: 100 - dataset_size: 0 + dataset_size: 1000000 oversample_ends: False slow_value_target: True slow_target_update: 1 diff --git a/dreamer.py b/dreamer.py index ba4bc17..25bf33d 100644 --- a/dreamer.py +++ b/dreamer.py @@ -252,14 +252,14 @@ class ProcessEpisodeWrap: cls.eval_lengths = [] cache.clear() - if mode == "train" and config.dataset_size: + if mode == "train": total = 0 for key, ep in reversed(sorted(cache.items(), key=lambda x: x[0])): - if total <= config.dataset_size - length: + if not config.dataset_size or total <= config.dataset_size - length: total += len(ep["reward"]) - 1 else: del cache[key] - logger.scalar("dataset_size", total + length) + logger.scalar("dataset_size", total) print(f"{mode.title()} episode has {length} steps and return {score:.1f}.") logger.scalar(f"{mode}_return", score) logger.scalar(f"{mode}_length", length)