applied formatter to envs

This commit is contained in:
NM512 2023-04-23 22:52:30 +09:00
parent 628b856c63
commit 6f0e6c6963
4 changed files with 417 additions and 400 deletions

View File

@ -2,30 +2,44 @@ import numpy as np
class Atari: class Atari:
LOCK = None LOCK = None
def __init__( def __init__(
self, name, action_repeat=4, size=(84, 84), gray=True, noops=0, lives='unused', self,
sticky=True, actions='all', length=108000, resize='opencv', seed=None): name,
action_repeat=4,
size=(84, 84),
gray=True,
noops=0,
lives="unused",
sticky=True,
actions="all",
length=108000,
resize="opencv",
seed=None,
):
assert size[0] == size[1] assert size[0] == size[1]
assert lives in ('unused', 'discount', 'reset'), lives assert lives in ("unused", "discount", "reset"), lives
assert actions in ('all', 'needed'), actions assert actions in ("all", "needed"), actions
assert resize in ('opencv', 'pillow'), resize assert resize in ("opencv", "pillow"), resize
if self.LOCK is None: if self.LOCK is None:
import multiprocessing as mp import multiprocessing as mp
mp = mp.get_context('spawn')
mp = mp.get_context("spawn")
self.LOCK = mp.Lock() self.LOCK = mp.Lock()
self._resize = resize self._resize = resize
if self._resize == 'opencv': if self._resize == "opencv":
import cv2 import cv2
self._cv2 = cv2 self._cv2 = cv2
if self._resize == 'pillow': if self._resize == "pillow":
from PIL import Image from PIL import Image
self._image = Image self._image = Image
import gym.envs.atari import gym.envs.atari
if name == 'james_bond':
name = 'jamesbond' if name == "james_bond":
name = "jamesbond"
self._repeat = action_repeat self._repeat = action_repeat
self._size = size self._size = size
self._gray = gray self._gray = gray
@ -37,10 +51,12 @@ class Atari:
with self.LOCK: with self.LOCK:
self._env = gym.envs.atari.AtariEnv( self._env = gym.envs.atari.AtariEnv(
game=name, game=name,
obs_type='image', obs_type="image",
frameskip=1, repeat_action_probability=0.25 if sticky else 0.0, frameskip=1,
full_action_space=(actions == 'all')) repeat_action_probability=0.25 if sticky else 0.0,
assert self._env.unwrapped.get_action_meanings()[0] == 'NOOP' full_action_space=(actions == "all"),
)
assert self._env.unwrapped.get_action_meanings()[0] == "NOOP"
shape = self._env.observation_space.shape shape = self._env.observation_space.shape
self._buffer = [np.zeros(shape, np.uint8) for _ in range(2)] self._buffer = [np.zeros(shape, np.uint8) for _ in range(2)]
self._ale = self._env.unwrapped.ale self._ale = self._env.unwrapped.ale
@ -73,7 +89,7 @@ class Atari:
self._screen(self._buffer[1]) self._screen(self._buffer[1])
if over: if over:
break break
if self._lives != 'unused': if self._lives != "unused":
current = self._ale.lives() current = self._ale.lives()
if current < self._last_lives: if current < self._last_lives:
dead = True dead = True
@ -85,8 +101,9 @@ class Atari:
self._done = over or (self._length and self._step >= self._length) or dead self._done = over or (self._length and self._step >= self._length) or dead
return self._obs( return self._obs(
total, total,
is_last=self._done or (dead and self._lives == 'reset'), is_last=self._done or (dead and self._lives == "reset"),
is_terminal=dead or over) is_terminal=dead or over,
)
def reset(self): def reset(self):
self._env.reset() self._env.reset()
@ -108,10 +125,11 @@ class Atari:
np.maximum(self._buffer[0], self._buffer[1], out=self._buffer[0]) np.maximum(self._buffer[0], self._buffer[1], out=self._buffer[0])
image = self._buffer[0] image = self._buffer[0]
if image.shape[:2] != self._size: if image.shape[:2] != self._size:
if self._resize == 'opencv': if self._resize == "opencv":
image = self._cv2.resize( image = self._cv2.resize(
image, self._size, interpolation=self._cv2.INTER_AREA) image, self._size, interpolation=self._cv2.INTER_AREA
if self._resize == 'pillow': )
if self._resize == "pillow":
image = self._image.fromarray(image) image = self._image.fromarray(image)
image = image.resize(self._size, self._image.NEAREST) image = image.resize(self._size, self._image.NEAREST)
image = np.array(image) image = np.array(image)
@ -119,7 +137,7 @@ class Atari:
weights = [0.299, 0.587, 1 - (0.299 + 0.587)] weights = [0.299, 0.587, 1 - (0.299 + 0.587)]
image = np.tensordot(image, weights, (-1, 0)).astype(image.dtype) image = np.tensordot(image, weights, (-1, 0)).astype(image.dtype)
image = image[:, :, None] image = image[:, :, None]
return {'image':image, 'is_terminal':is_terminal}, reward, is_last, {} return {"image": image, "is_terminal": is_terminal}, reward, is_last, {}
def _screen(self, array): def _screen(self, array):
self._ale.getScreenRGB2(array) self._ale.getScreenRGB2(array)

View File

@ -3,13 +3,13 @@ import numpy as np
class DeepMindControl: class DeepMindControl:
def __init__(self, name, action_repeat=1, size=(64, 64), camera=None): def __init__(self, name, action_repeat=1, size=(64, 64), camera=None):
domain, task = name.split('_', 1) domain, task = name.split("_", 1)
if domain == 'cup': # Only domain with multiple words. if domain == "cup": # Only domain with multiple words.
domain = 'ball_in_cup' domain = "ball_in_cup"
if isinstance(domain, str): if isinstance(domain, str):
from dm_control import suite from dm_control import suite
self._env = suite.load(domain, task) self._env = suite.load(domain, task)
else: else:
assert task is None assert task is None
@ -24,10 +24,8 @@ class DeepMindControl:
def observation_space(self): def observation_space(self):
spaces = {} spaces = {}
for key, value in self._env.observation_spec().items(): for key, value in self._env.observation_spec().items():
spaces[key] = gym.spaces.Box( spaces[key] = gym.spaces.Box(-np.inf, np.inf, value.shape, dtype=np.float32)
-np.inf, np.inf, value.shape, dtype=np.float32) spaces["image"] = gym.spaces.Box(0, 255, self._size + (3,), dtype=np.uint8)
spaces['image'] = gym.spaces.Box(
0, 255, self._size + (3,), dtype=np.uint8)
return gym.spaces.Dict(spaces) return gym.spaces.Dict(spaces)
@property @property
@ -44,21 +42,21 @@ class DeepMindControl:
if time_step.last(): if time_step.last():
break break
obs = dict(time_step.observation) obs = dict(time_step.observation)
obs['image'] = self.render() obs["image"] = self.render()
# There is no terminal state in DMC # There is no terminal state in DMC
obs['is_terminal'] = False obs["is_terminal"] = False
done = time_step.last() done = time_step.last()
info = {'discount': np.array(time_step.discount, np.float32)} info = {"discount": np.array(time_step.discount, np.float32)}
return obs, reward, done, info return obs, reward, done, info
def reset(self): def reset(self):
time_step = self._env.reset() time_step = self._env.reset()
obs = dict(time_step.observation) obs = dict(time_step.observation)
obs['image'] = self.render() obs["image"] = self.render()
obs['is_terminal'] = False obs["is_terminal"] = False
return obs return obs
def render(self, *args, **kwargs): def render(self, *args, **kwargs):
if kwargs.get('mode', 'rgb_array') != 'rgb_array': if kwargs.get("mode", "rgb_array") != "rgb_array":
raise ValueError("Only render mode 'rgb_array' is supported.") raise ValueError("Only render mode 'rgb_array' is supported.")
return self._env.physics.render(*self._size, camera_id=self._camera) return self._env.physics.render(*self._size, camera_id=self._camera)

View File

@ -4,7 +4,6 @@ import deepmind_lab
class DeepMindLabyrinth(object): class DeepMindLabyrinth(object):
ACTION_SET_DEFAULT = ( ACTION_SET_DEFAULT = (
(0, 0, 0, 1, 0, 0, 0), # Forward (0, 0, 0, 1, 0, 0, 0), # Forward
(0, 0, 0, -1, 0, 0, 0), # Backward (0, 0, 0, -1, 0, 0, 0), # Backward
@ -34,36 +33,44 @@ class DeepMindLabyrinth(object):
) )
def __init__( def __init__(
self, level, mode, action_repeat=4, render_size=(64, 64), self,
action_set=ACTION_SET_DEFAULT, level_cache=None, seed=None, level,
runfiles_path=None): mode,
assert mode in ('train', 'test') action_repeat=4,
render_size=(64, 64),
action_set=ACTION_SET_DEFAULT,
level_cache=None,
seed=None,
runfiles_path=None,
):
assert mode in ("train", "test")
if runfiles_path: if runfiles_path:
print('Setting DMLab runfiles path:', runfiles_path) print("Setting DMLab runfiles path:", runfiles_path)
deepmind_lab.set_runfiles_path(runfiles_path) deepmind_lab.set_runfiles_path(runfiles_path)
self._config = {} self._config = {}
self._config['width'] = render_size[0] self._config["width"] = render_size[0]
self._config['height'] = render_size[1] self._config["height"] = render_size[1]
self._config['logLevel'] = 'WARN' self._config["logLevel"] = "WARN"
if mode == 'test': if mode == "test":
self._config['allowHoldOutLevels'] = 'true' self._config["allowHoldOutLevels"] = "true"
self._config['mixerSeed'] = 0x600D5EED self._config["mixerSeed"] = 0x600D5EED
self._action_repeat = action_repeat self._action_repeat = action_repeat
self._random = np.random.RandomState(seed) self._random = np.random.RandomState(seed)
self._env = deepmind_lab.Lab( self._env = deepmind_lab.Lab(
level='contributed/dmlab30/'+level, level="contributed/dmlab30/" + level,
observations=['RGB_INTERLEAVED'], observations=["RGB_INTERLEAVED"],
config={k: str(v) for k, v in self._config.items()}, config={k: str(v) for k, v in self._config.items()},
level_cache=level_cache) level_cache=level_cache,
)
self._action_set = action_set self._action_set = action_set
self._last_image = None self._last_image = None
self._done = True self._done = True
@property @property
def observation_space(self): def observation_space(self):
shape = (self._config['height'], self._config['width'], 3) shape = (self._config["height"], self._config["width"], 3)
space = gym.spaces.Box(low=0, high=255, shape=shape, dtype=np.uint8) space = gym.spaces.Box(low=0, high=255, shape=shape, dtype=np.uint8)
return gym.spaces.Dict({'image': space}) return gym.spaces.Dict({"image": space})
@property @property
def action_space(self): def action_space(self):
@ -83,7 +90,7 @@ class DeepMindLabyrinth(object):
return obs, reward, self._done, {} return obs, reward, self._done, {}
def render(self, *args, **kwargs): def render(self, *args, **kwargs):
if kwargs.get('mode', 'rgb_array') != 'rgb_array': if kwargs.get("mode", "rgb_array") != "rgb_array":
raise ValueError("Only render mode 'rgb_array' is supported.") raise ValueError("Only render mode 'rgb_array' is supported.")
del args # Unused del args # Unused
del kwargs # Unused del kwargs # Unused
@ -96,6 +103,6 @@ class DeepMindLabyrinth(object):
if self._done: if self._done:
image = 0 * self._last_image image = 0 * self._last_image
else: else:
image = self._env.observations()['RGB_INTERLEAVED'] image = self._env.observations()["RGB_INTERLEAVED"]
self._last_image = image self._last_image = image
return {'image': image} return {"image": image}

View File

@ -3,7 +3,6 @@ import numpy as np
class CollectDataset: class CollectDataset:
def __init__(self, env, callbacks=None, precision=32): def __init__(self, env, callbacks=None, precision=32):
self._env = env self._env = env
self._callbacks = callbacks or () self._callbacks = callbacks or ()
@ -20,9 +19,9 @@ class CollectDataset:
if isinstance(action, dict): if isinstance(action, dict):
transition.update(action) transition.update(action)
else: else:
transition['action'] = action transition["action"] = action
transition['reward'] = reward transition["reward"] = reward
transition['discount'] = info.get('discount', np.array(1 - float(done))) transition["discount"] = info.get("discount", np.array(1 - float(done)))
self._episode.append(transition) self._episode.append(transition)
if done: if done:
for key, value in self._episode[1].items(): for key, value in self._episode[1].items():
@ -30,7 +29,7 @@ class CollectDataset:
self._episode[0][key] = 0 * value self._episode[0][key] = 0 * value
episode = {k: [t[k] for t in self._episode] for k in self._episode[0]} episode = {k: [t[k] for t in self._episode] for k in self._episode[0]}
episode = {k: self._convert(v) for k, v in episode.items()} episode = {k: self._convert(v) for k, v in episode.items()}
info['episode'] = episode info["episode"] = episode
for callback in self._callbacks: for callback in self._callbacks:
callback(episode) callback(episode)
return obs, reward, done, info return obs, reward, done, info
@ -41,8 +40,8 @@ class CollectDataset:
# Missing keys will be filled with a zeroed out version of the first # Missing keys will be filled with a zeroed out version of the first
# transition, because we do not know what action information the agent will # transition, because we do not know what action information the agent will
# pass yet. # pass yet.
transition['reward'] = 0.0 transition["reward"] = 0.0
transition['discount'] = 1.0 transition["discount"] = 1.0
self._episode = [transition] self._episode = [transition]
return obs return obs
@ -62,7 +61,6 @@ class CollectDataset:
class TimeLimit: class TimeLimit:
def __init__(self, env, duration): def __init__(self, env, duration):
self._env = env self._env = env
self._duration = duration self._duration = duration
@ -72,13 +70,13 @@ class TimeLimit:
return getattr(self._env, name) return getattr(self._env, name)
def step(self, action): def step(self, action):
assert self._step is not None, 'Must reset environment.' assert self._step is not None, "Must reset environment."
obs, reward, done, info = self._env.step(action) obs, reward, done, info = self._env.step(action)
self._step += 1 self._step += 1
if self._step >= self._duration: if self._step >= self._duration:
done = True done = True
if 'discount' not in info: if "discount" not in info:
info['discount'] = np.array(1.0).astype(np.float32) info["discount"] = np.array(1.0).astype(np.float32)
self._step = None self._step = None
return obs, reward, done, info return obs, reward, done, info
@ -88,12 +86,11 @@ class TimeLimit:
class NormalizeActions: class NormalizeActions:
def __init__(self, env): def __init__(self, env):
self._env = env self._env = env
self._mask = np.logical_and( self._mask = np.logical_and(
np.isfinite(env.action_space.low), np.isfinite(env.action_space.low), np.isfinite(env.action_space.high)
np.isfinite(env.action_space.high)) )
self._low = np.where(self._mask, env.action_space.low, -1) self._low = np.where(self._mask, env.action_space.low, -1)
self._high = np.where(self._mask, env.action_space.high, 1) self._high = np.where(self._mask, env.action_space.high, 1)
@ -113,7 +110,6 @@ class NormalizeActions:
class OneHotAction: class OneHotAction:
def __init__(self, env): def __init__(self, env):
assert isinstance(env.action_space, gym.spaces.Discrete) assert isinstance(env.action_space, gym.spaces.Discrete)
self._env = env self._env = env
@ -135,7 +131,7 @@ class OneHotAction:
reference = np.zeros_like(action) reference = np.zeros_like(action)
reference[index] = 1 reference[index] = 1
if not np.allclose(reference, action): if not np.allclose(reference, action):
raise ValueError(f'Invalid one-hot action:\n{action}') raise ValueError(f"Invalid one-hot action:\n{action}")
return self._env.step(index) return self._env.step(index)
def reset(self): def reset(self):
@ -150,7 +146,6 @@ class OneHotAction:
class RewardObs: class RewardObs:
def __init__(self, env): def __init__(self, env):
self._env = env self._env = env
@ -160,23 +155,22 @@ class RewardObs:
@property @property
def observation_space(self): def observation_space(self):
spaces = self._env.observation_space.spaces spaces = self._env.observation_space.spaces
assert 'reward' not in spaces assert "reward" not in spaces
spaces['reward'] = gym.spaces.Box(-np.inf, np.inf, dtype=np.float32) spaces["reward"] = gym.spaces.Box(-np.inf, np.inf, dtype=np.float32)
return gym.spaces.Dict(spaces) return gym.spaces.Dict(spaces)
def step(self, action): def step(self, action):
obs, reward, done, info = self._env.step(action) obs, reward, done, info = self._env.step(action)
obs['reward'] = reward obs["reward"] = reward
return obs, reward, done, info return obs, reward, done, info
def reset(self): def reset(self):
obs = self._env.reset() obs = self._env.reset()
obs['reward'] = 0.0 obs["reward"] = 0.0
return obs return obs
class SelectAction: class SelectAction:
def __init__(self, env, key): def __init__(self, env, key):
self._env = env self._env = env
self._key = key self._key = key