Tianshou/tianshou/env/atari.py

import cv2
import gym
import numpy as np
from gym.spaces.box import Box


def create_atari_environment(name=None, sticky_actions=True,
                             max_episode_steps=2000):
    game_version = 'v0' if sticky_actions else 'v4'
    name = '{}NoFrameskip-{}'.format(name, game_version)
    env = gym.make(name)
    env = env.env
    env = preprocessing(env, max_episode_steps=max_episode_steps)
    return env


class preprocessing(object):
    def __init__(self, env, frame_skip=4, terminal_on_life_loss=False,
                 size=84, max_episode_steps=2000):
        self.max_episode_steps = max_episode_steps
        self.env = env
        self.terminal_on_life_loss = terminal_on_life_loss
        self.frame_skip = frame_skip
        self.size = size
        self.count = 0
        obs_dims = self.env.observation_space

        self.screen_buffer = [
            np.empty((obs_dims.shape[0], obs_dims.shape[1]), dtype=np.uint8),
            np.empty((obs_dims.shape[0], obs_dims.shape[1]), dtype=np.uint8)
        ]

        self.game_over = False
        self.lives = 0

    @property
    def observation_space(self):
        return Box(low=0, high=255, shape=(self.size, self.size, 4),
                   dtype=np.uint8)

    def action_space(self):
        return self.env.action_space

    def reward_range(self):
        return self.env.reward_range

    def metadata(self):
        return self.env.metadata

    def close(self):
        return self.env.close()

    def reset(self):
        self.count = 0
        self.env.reset()
        self.lives = self.env.ale.lives()
        self._grayscale_obs(self.screen_buffer[0])
        self.screen_buffer[1].fill(0)

        return np.stack([
            self._pool_and_resize() for _ in range(self.frame_skip)], axis=-1)

    def render(self, mode='human'):
        return self.env.render(mode)

    def step(self, action):
        total_reward = 0.
        observation = []
        for t in range(self.frame_skip):
            self.count += 1
            _, reward, terminal, info = self.env.step(action)
            total_reward += reward

            if self.terminal_on_life_loss:
                lives = self.env.ale.lives()
                is_terminal = terminal or lives < self.lives
                self.lives = lives
            else:
                is_terminal = terminal

            if is_terminal:
                break
            elif t >= self.frame_skip - 2:
                t_ = t - (self.frame_skip - 2)
                self._grayscale_obs(self.screen_buffer[t_])

            observation.append(self._pool_and_resize())
        while len(observation) > 0 and len(observation) < self.frame_skip:
            observation.append(observation[-1])
        if len(observation) > 0:
            observation = np.stack(observation, axis=-1)
        else:
            observation = np.stack([
                self._pool_and_resize() for _ in range(self.frame_skip)],
                axis=-1)
        if self.count >= self.max_episode_steps:
            terminal = True
        else:
            terminal = False
        return observation, total_reward, (terminal or is_terminal), info

    def _grayscale_obs(self, output):
        self.env.ale.getScreenGrayscale(output)
        return output

    def _pool_and_resize(self):
        if self.frame_skip > 1:
            np.maximum(self.screen_buffer[0], self.screen_buffer[1],
                       out=self.screen_buffer[0])

        transformed_image = cv2.resize(self.screen_buffer[0],
                                       (self.size, self.size),
                                       interpolation=cv2.INTER_AREA)
        int_image = np.asarray(transformed_image, dtype=np.uint8)
        # return np.expand_dims(int_image, axis=2)
        return int_image
fix atari/mujoco env (#7) * update atari.py * fix setup.py pass the pytest * fix setup.py pass the pytest * add args "render" * change the tensorboard writter * change the tensorboard writter * change device, render, tensorboard log location * change device, render, tensorboard log location * remove some wrong local files * fix some tab mistakes and the envs name in continuous/test_xx.py * add examples and point robot maze environment * fix some bugs during testing examples * add dqn network and fix some args * change back the tensorboard writter's frequency to ensure ppo and a2c can write things normally * add a warning to collector * rm some unrelated files * reformat * fix a bug in test_dqn due to the model wrong selection * change atari frame skip and observation to improve performance * readd some files * change import * modified readme * rm tensorboard log * update atari and mujoco which are ignored * rm the wrong lines 2020-03-28 12:03:49 +08:00			`import cv2`
			`import gym`
			`import numpy as np`
			`from gym.spaces.box import Box`


update readme and force flake8 2020-03-28 13:27:01 +08:00			`def create_atari_environment(name=None, sticky_actions=True,`
			`max_episode_steps=2000):`
fix atari/mujoco env (#7) * update atari.py * fix setup.py pass the pytest * fix setup.py pass the pytest * add args "render" * change the tensorboard writter * change the tensorboard writter * change device, render, tensorboard log location * change device, render, tensorboard log location * remove some wrong local files * fix some tab mistakes and the envs name in continuous/test_xx.py * add examples and point robot maze environment * fix some bugs during testing examples * add dqn network and fix some args * change back the tensorboard writter's frequency to ensure ppo and a2c can write things normally * add a warning to collector * rm some unrelated files * reformat * fix a bug in test_dqn due to the model wrong selection * change atari frame skip and observation to improve performance * readd some files * change import * modified readme * rm tensorboard log * update atari and mujoco which are ignored * rm the wrong lines 2020-03-28 12:03:49 +08:00			`game_version = 'v0' if sticky_actions else 'v4'`
			`name = '{}NoFrameskip-{}'.format(name, game_version)`
			`env = gym.make(name)`
			`env = env.env`
			`env = preprocessing(env, max_episode_steps=max_episode_steps)`
			`return env`


			`class preprocessing(object):`
			`def __init__(self, env, frame_skip=4, terminal_on_life_loss=False,`
			`size=84, max_episode_steps=2000):`
			`self.max_episode_steps = max_episode_steps`
			`self.env = env`
			`self.terminal_on_life_loss = terminal_on_life_loss`
			`self.frame_skip = frame_skip`
			`self.size = size`
			`self.count = 0`
			`obs_dims = self.env.observation_space`

			`self.screen_buffer = [`
			`np.empty((obs_dims.shape[0], obs_dims.shape[1]), dtype=np.uint8),`
			`np.empty((obs_dims.shape[0], obs_dims.shape[1]), dtype=np.uint8)`
			`]`

			`self.game_over = False`
			`self.lives = 0`

			`@property`
			`def observation_space(self):`
			`return Box(low=0, high=255, shape=(self.size, self.size, 4),`
			`dtype=np.uint8)`

			`def action_space(self):`
			`return self.env.action_space`

			`def reward_range(self):`
			`return self.env.reward_range`

			`def metadata(self):`
			`return self.env.metadata`

			`def close(self):`
			`return self.env.close()`

			`def reset(self):`
			`self.count = 0`
			`self.env.reset()`
			`self.lives = self.env.ale.lives()`
			`self._grayscale_obs(self.screen_buffer[0])`
			`self.screen_buffer[1].fill(0)`

update readme and force flake8 2020-03-28 13:27:01 +08:00			`return np.stack([`
			`self._pool_and_resize() for _ in range(self.frame_skip)], axis=-1)`
fix atari/mujoco env (#7) * update atari.py * fix setup.py pass the pytest * fix setup.py pass the pytest * add args "render" * change the tensorboard writter * change the tensorboard writter * change device, render, tensorboard log location * change device, render, tensorboard log location * remove some wrong local files * fix some tab mistakes and the envs name in continuous/test_xx.py * add examples and point robot maze environment * fix some bugs during testing examples * add dqn network and fix some args * change back the tensorboard writter's frequency to ensure ppo and a2c can write things normally * add a warning to collector * rm some unrelated files * reformat * fix a bug in test_dqn due to the model wrong selection * change atari frame skip and observation to improve performance * readd some files * change import * modified readme * rm tensorboard log * update atari and mujoco which are ignored * rm the wrong lines 2020-03-28 12:03:49 +08:00
fix #36 2020-04-23 22:06:18 +08:00			`def render(self, mode='human'):`
fix atari/mujoco env (#7) * update atari.py * fix setup.py pass the pytest * fix setup.py pass the pytest * add args "render" * change the tensorboard writter * change the tensorboard writter * change device, render, tensorboard log location * change device, render, tensorboard log location * remove some wrong local files * fix some tab mistakes and the envs name in continuous/test_xx.py * add examples and point robot maze environment * fix some bugs during testing examples * add dqn network and fix some args * change back the tensorboard writter's frequency to ensure ppo and a2c can write things normally * add a warning to collector * rm some unrelated files * reformat * fix a bug in test_dqn due to the model wrong selection * change atari frame skip and observation to improve performance * readd some files * change import * modified readme * rm tensorboard log * update atari and mujoco which are ignored * rm the wrong lines 2020-03-28 12:03:49 +08:00			`return self.env.render(mode)`

			`def step(self, action):`
			`total_reward = 0.`
			`observation = []`
			`for t in range(self.frame_skip):`
			`self.count += 1`
			`_, reward, terminal, info = self.env.step(action)`
			`total_reward += reward`

			`if self.terminal_on_life_loss:`
			`lives = self.env.ale.lives()`
			`is_terminal = terminal or lives < self.lives`
			`self.lives = lives`
			`else:`
			`is_terminal = terminal`

			`if is_terminal:`
			`break`
			`elif t >= self.frame_skip - 2:`
			`t_ = t - (self.frame_skip - 2)`
			`self._grayscale_obs(self.screen_buffer[t_])`

			`observation.append(self._pool_and_resize())`
			`while len(observation) > 0 and len(observation) < self.frame_skip:`
			`observation.append(observation[-1])`
			`if len(observation) > 0:`
			`observation = np.stack(observation, axis=-1)`
			`else:`
update readme and force flake8 2020-03-28 13:27:01 +08:00			`observation = np.stack([`
			`self._pool_and_resize() for _ in range(self.frame_skip)],`
			`axis=-1)`
fix atari/mujoco env (#7) * update atari.py * fix setup.py pass the pytest * fix setup.py pass the pytest * add args "render" * change the tensorboard writter * change the tensorboard writter * change device, render, tensorboard log location * change device, render, tensorboard log location * remove some wrong local files * fix some tab mistakes and the envs name in continuous/test_xx.py * add examples and point robot maze environment * fix some bugs during testing examples * add dqn network and fix some args * change back the tensorboard writter's frequency to ensure ppo and a2c can write things normally * add a warning to collector * rm some unrelated files * reformat * fix a bug in test_dqn due to the model wrong selection * change atari frame skip and observation to improve performance * readd some files * change import * modified readme * rm tensorboard log * update atari and mujoco which are ignored * rm the wrong lines 2020-03-28 12:03:49 +08:00			`if self.count >= self.max_episode_steps:`
			`terminal = True`
fix #33 (#34) 2020-04-21 15:36:08 +08:00			`else:`
			`terminal = False`
			`return observation, total_reward, (terminal or is_terminal), info`
fix atari/mujoco env (#7) * update atari.py * fix setup.py pass the pytest * fix setup.py pass the pytest * add args "render" * change the tensorboard writter * change the tensorboard writter * change device, render, tensorboard log location * change device, render, tensorboard log location * remove some wrong local files * fix some tab mistakes and the envs name in continuous/test_xx.py * add examples and point robot maze environment * fix some bugs during testing examples * add dqn network and fix some args * change back the tensorboard writter's frequency to ensure ppo and a2c can write things normally * add a warning to collector * rm some unrelated files * reformat * fix a bug in test_dqn due to the model wrong selection * change atari frame skip and observation to improve performance * readd some files * change import * modified readme * rm tensorboard log * update atari and mujoco which are ignored * rm the wrong lines 2020-03-28 12:03:49 +08:00
			`def _grayscale_obs(self, output):`
			`self.env.ale.getScreenGrayscale(output)`
			`return output`

			`def _pool_and_resize(self):`
			`if self.frame_skip > 1:`
			`np.maximum(self.screen_buffer[0], self.screen_buffer[1],`
			`out=self.screen_buffer[0])`

			`transformed_image = cv2.resize(self.screen_buffer[0],`
			`(self.size, self.size),`
			`interpolation=cv2.INTER_AREA)`
			`int_image = np.asarray(transformed_image, dtype=np.uint8)`
			`# return np.expand_dims(int_image, axis=2)`
			`return int_image`