import atexit import os import sys import cloudpickle import gym import numpy as np ###from tf dreamerv2 code class MemoryMaze: def __init__(self, env, obs_key='image', act_key='action', size=(64, 64)): self._env = env self._obs_is_dict = hasattr(self._env.observation_space, 'spaces') self._act_is_dict = hasattr(self._env.action_space, 'spaces') self._obs_key = obs_key self._act_key = act_key self._size = size self._gray = False def __getattr__(self, name): if name.startswith('__'): raise AttributeError(name) try: return getattr(self._env, name) except AttributeError: raise ValueError(name) @property def obs_space(self): if self._obs_is_dict: spaces = self._env.observation_space.spaces.copy() else: spaces = {self._obs_key: self._env.observation_space} return { **spaces, 'reward': gym.spaces.Box(-np.inf, np.inf, (), dtype=np.float32), 'is_first': gym.spaces.Box(0, 1, (), dtype=np.bool), 'is_last': gym.spaces.Box(0, 1, (), dtype=np.bool), 'is_terminal': gym.spaces.Box(0, 1, (), dtype=np.bool), } @property def act_space(self): if self._act_is_dict: return self._env.action_space.spaces.copy() else: return {self._act_key: self._env.action_space} @property def observation_space(self): img_shape = self._size + ((1,) if self._gray else (3,)) return gym.spaces.Dict( { "image": gym.spaces.Box(0, 255, img_shape, np.uint8), } ) @property def action_space(self): space = self._env.action_space space.discrete = True return space def step(self, action): # if not self._act_is_dict: # action = action[self._act_key] obs, reward, done, info = self._env.step(action) if not self._obs_is_dict: obs = {self._obs_key: obs} # obs['reward'] = float(reward) obs['is_first'] = False obs['is_last'] = done obs['is_terminal'] = info.get('is_terminal', False) return obs, reward, done, info def reset(self): obs = self._env.reset() if not self._obs_is_dict: obs = {self._obs_key: obs} obs['reward'] = 0.0 obs['is_first'] = True obs['is_last'] = False obs['is_terminal'] = False return obs