From 1cf0149c10a63c88765bc8b397e6932f1bedfa96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=BE=B7=E7=A5=A5?= Date: Wed, 14 Jun 2023 20:22:17 +0800 Subject: [PATCH] env v0.13 --- configs.yaml | 8 ++++---- envs/memmazeEnv.py | 4 +++- envs/wrappers.py | 4 ++-- tools.py | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/configs.yaml b/configs.yaml index 7c246e1..65cf26e 100644 --- a/configs.yaml +++ b/configs.yaml @@ -25,7 +25,7 @@ defaults: action_repeat: 2 time_limit: 1000 grayscale: False - prefill: 250 #0 + prefill: 3500 eval_noise: 0.0 reward_EMA: True @@ -150,7 +150,7 @@ atari100k: mazed: task: "memmaze_9_9" - steps: 5e4 + steps: 1e6 action_repeat: 2 debug: @@ -162,12 +162,12 @@ debug: mazegym: #task: "memory_maze:MemoryMaze-9x9-v0" - steps: 5e4 + steps: 1e6 action_repeat: 2 mazedeepm: task: "memmaze_9_9" - steps: 5e4 + steps: 1e6 action_repeat: 2 diff --git a/envs/memmazeEnv.py b/envs/memmazeEnv.py index 37a835f..81914fb 100644 --- a/envs/memmazeEnv.py +++ b/envs/memmazeEnv.py @@ -11,12 +11,14 @@ import numpy as np class MZGymWrapper: - def __init__(self, env, obs_key='image', act_key='action'): + def __init__(self, env, obs_key='image', act_key='action', size=(64, 64)): self._env = env self._obs_is_dict = hasattr(self._env.observation_space, 'spaces') self._act_is_dict = hasattr(self._env.action_space, 'spaces') self._obs_key = obs_key self._act_key = act_key + self._size = size + self._gray = False def __getattr__(self, name): if name.startswith('__'): diff --git a/envs/wrappers.py b/envs/wrappers.py index 9769fc9..eacb173 100644 --- a/envs/wrappers.py +++ b/envs/wrappers.py @@ -155,8 +155,8 @@ class OneHotAction: index = np.argmax(action).astype(int) reference = np.zeros_like(action) reference[index] = 1 - if not np.allclose(reference, action): - raise ValueError(f"Invalid one-hot action:\n{action}") + # if not np.allclose(reference, action): + # raise ValueError(f"Invalid one-hot action:\n{action}") return self._env.step(index) def reset(self): diff --git a/tools.py b/tools.py index bc46903..752b786 100644 --- a/tools.py +++ b/tools.py @@ -127,7 +127,7 @@ def simulate(agent, envs, steps=0, episodes=0, state=None): # Initialize or unpack simulation state. if state is None: step, episode = 0, 0 - done = np.ones(len(envs), np.bool) + done = np.ones(len(envs), np.bool_) length = np.zeros(len(envs), np.int32) obs = [None] * len(envs) agent_state = None