import gym import numpy as np import deepmind_lab class DeepMindLabyrinth(object): ACTION_SET_DEFAULT = ( (0, 0, 0, 1, 0, 0, 0), # Forward (0, 0, 0, -1, 0, 0, 0), # Backward (0, 0, -1, 0, 0, 0, 0), # Strafe Left (0, 0, 1, 0, 0, 0, 0), # Strafe Right (-20, 0, 0, 0, 0, 0, 0), # Look Left (20, 0, 0, 0, 0, 0, 0), # Look Right (-20, 0, 0, 1, 0, 0, 0), # Look Left + Forward (20, 0, 0, 1, 0, 0, 0), # Look Right + Forward (0, 0, 0, 0, 1, 0, 0), # Fire ) ACTION_SET_MEDIUM = ( (0, 0, 0, 1, 0, 0, 0), # Forward (0, 0, 0, -1, 0, 0, 0), # Backward (0, 0, -1, 0, 0, 0, 0), # Strafe Left (0, 0, 1, 0, 0, 0, 0), # Strafe Right (-20, 0, 0, 0, 0, 0, 0), # Look Left (20, 0, 0, 0, 0, 0, 0), # Look Right (0, 0, 0, 0, 0, 0, 0), # Idle. ) ACTION_SET_SMALL = ( (0, 0, 0, 1, 0, 0, 0), # Forward (-20, 0, 0, 0, 0, 0, 0), # Look Left (20, 0, 0, 0, 0, 0, 0), # Look Right ) def __init__( self, level, mode, action_repeat=4, render_size=(64, 64), action_set=ACTION_SET_DEFAULT, level_cache=None, seed=None, runfiles_path=None): assert mode in ('train', 'test') if runfiles_path: print('Setting DMLab runfiles path:', runfiles_path) deepmind_lab.set_runfiles_path(runfiles_path) self._config = {} self._config['width'] = render_size[0] self._config['height'] = render_size[1] self._config['logLevel'] = 'WARN' if mode == 'test': self._config['allowHoldOutLevels'] = 'true' self._config['mixerSeed'] = 0x600D5EED self._action_repeat = action_repeat self._random = np.random.RandomState(seed) self._env = deepmind_lab.Lab( level='contributed/dmlab30/'+level, observations=['RGB_INTERLEAVED'], config={k: str(v) for k, v in self._config.items()}, level_cache=level_cache) self._action_set = action_set self._last_image = None self._done = True @property def observation_space(self): shape = (self._config['height'], self._config['width'], 3) space = gym.spaces.Box(low=0, high=255, shape=shape, dtype=np.uint8) return gym.spaces.Dict({'image': space}) @property def action_space(self): return gym.spaces.Discrete(len(self._action_set)) def reset(self): self._done = False self._env.reset(seed=self._random.randint(0, 2 ** 31 - 1)) obs = self._get_obs() return obs def step(self, action): raw_action = np.array(self._action_set[action], np.intc) reward = self._env.step(raw_action, num_steps=self._action_repeat) self._done = not self._env.is_running() obs = self._get_obs() return obs, reward, self._done, {} def render(self, *args, **kwargs): if kwargs.get('mode', 'rgb_array') != 'rgb_array': raise ValueError("Only render mode 'rgb_array' is supported.") del args # Unused del kwargs # Unused return self._last_image def close(self): self._env.close() def _get_obs(self): if self._done: image = 0 * self._last_image else: image = self._env.observations()['RGB_INTERLEAVED'] self._last_image = image return {'image': image}