Tianshou/examples/vizdoom/env.py

import os

import cv2
import gymnasium as gym
import numpy as np
import vizdoom as vzd

from tianshou.env import ShmemVectorEnv

try:
    import envpool
except ImportError:
    envpool = None


def normal_button_comb():
    actions = []
    m_forward = [[0.0], [1.0]]
    t_left_right = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0]]
    for i in m_forward:
        for j in t_left_right:
            actions.append(i + j)
    return actions


def battle_button_comb():
    actions = []
    m_forward_backward = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0]]
    m_left_right = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0]]
    t_left_right = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0]]
    attack = [[0.0], [1.0]]
    speed = [[0.0], [1.0]]

    for m in attack:
        for n in speed:
            for j in m_left_right:
                for i in m_forward_backward:
                    for k in t_left_right:
                        actions.append(i + j + k + m + n)
    return actions


class Env(gym.Env):
    def __init__(self, cfg_path, frameskip=4, res=(4, 40, 60), save_lmp=False):
        super().__init__()
        self.save_lmp = save_lmp
        self.health_setting = "battle" in cfg_path
        if save_lmp:
            os.makedirs("lmps", exist_ok=True)
        self.res = res
        self.skip = frameskip
        self.observation_space = gym.spaces.Box(low=0, high=255, shape=res, dtype=np.float32)
        self.game = vzd.DoomGame()
        self.game.load_config(cfg_path)
        self.game.init()
        if "battle" in cfg_path:
            self.available_actions = battle_button_comb()
        else:
            self.available_actions = normal_button_comb()
        self.action_num = len(self.available_actions)
        self.action_space = gym.spaces.Discrete(self.action_num)
        self.spec = gym.envs.registration.EnvSpec("vizdoom-v0")
        self.count = 0

    def get_obs(self):
        state = self.game.get_state()
        if state is None:
            return
        obs = state.screen_buffer
        self.obs_buffer[:-1] = self.obs_buffer[1:]
        self.obs_buffer[-1] = cv2.resize(obs, (self.res[-1], self.res[-2]))

    def reset(self):
        if self.save_lmp:
            self.game.new_episode(f"lmps/episode_{self.count}.lmp")
        else:
            self.game.new_episode()
        self.count += 1
        self.obs_buffer = np.zeros(self.res, dtype=np.uint8)
        self.get_obs()
        self.health = self.game.get_game_variable(vzd.GameVariable.HEALTH)
        self.killcount = self.game.get_game_variable(vzd.GameVariable.KILLCOUNT)
        self.ammo2 = self.game.get_game_variable(vzd.GameVariable.AMMO2)
        return self.obs_buffer

    def step(self, action):
        self.game.make_action(self.available_actions[action], self.skip)
        reward = 0.0
        self.get_obs()
        health = self.game.get_game_variable(vzd.GameVariable.HEALTH)
        if self.health_setting or health > self.health:  # positive health reward only for d1/d2
            reward += health - self.health
        self.health = health
        killcount = self.game.get_game_variable(vzd.GameVariable.KILLCOUNT)
        reward += 20 * (killcount - self.killcount)
        self.killcount = killcount
        ammo2 = self.game.get_game_variable(vzd.GameVariable.AMMO2)
        # if ammo2 > self.ammo2:
        reward += ammo2 - self.ammo2
        self.ammo2 = ammo2
        done = False
        info = {}
        if self.game.is_player_dead() or self.game.get_state() is None:
            done = True
        elif self.game.is_episode_finished():
            done = True
            info["TimeLimit.truncated"] = True
        return self.obs_buffer, reward, done, info

    def render(self):
        pass

    def close(self):
        self.game.close()


def make_vizdoom_env(task, frame_skip, res, save_lmp, seed, training_num, test_num):
    test_num = min(os.cpu_count() - 1, test_num)
    if envpool is not None:
        task_id = "".join([i.capitalize() for i in task.split("_")]) + "-v1"
        lmp_save_dir = "lmps/" if save_lmp else ""
        reward_config = {
            "KILLCOUNT": [20.0, -20.0],
            "HEALTH": [1.0, 0.0],
            "AMMO2": [1.0, -1.0],
        }
        if "battle" in task:
            reward_config["HEALTH"] = [1.0, -1.0]
        env = train_envs = envpool.make_gymnasium(
            task_id,
            frame_skip=frame_skip,
            stack_num=res[0],
            seed=seed,
            num_envs=training_num,
            reward_config=reward_config,
            use_combined_action=True,
            max_episode_steps=2625,
            use_inter_area_resize=False,
        )
        test_envs = envpool.make_gymnasium(
            task_id,
            frame_skip=frame_skip,
            stack_num=res[0],
            lmp_save_dir=lmp_save_dir,
            seed=seed,
            num_envs=test_num,
            reward_config=reward_config,
            use_combined_action=True,
            max_episode_steps=2625,
            use_inter_area_resize=False,
        )
    else:
        cfg_path = f"maps/{task}.cfg"
        env = Env(cfg_path, frame_skip, res)
        train_envs = ShmemVectorEnv(
            [lambda: Env(cfg_path, frame_skip, res) for _ in range(training_num)],
        )
        test_envs = ShmemVectorEnv(
            [lambda: Env(cfg_path, frame_skip, res, save_lmp) for _ in range(test_num)],
        )
        train_envs.seed(seed)
        test_envs.seed(seed)
    return env, train_envs, test_envs


if __name__ == "__main__":
    # env = Env("maps/D1_basic.cfg", 4, (4, 84, 84))
    env = Env("maps/D3_battle.cfg", 4, (4, 84, 84))
    print(env.available_actions)
    action_num = env.action_space.n
    obs = env.reset()
    print(env.spec.reward_threshold)
    print(obs.shape, action_num)
    for _ in range(4000):
        obs, rew, terminated, truncated, info = env.step(0)
        if terminated or truncated:
            env.reset()
    print(obs.shape, rew, terminated, truncated)
    cv2.imwrite("test.png", obs.transpose(1, 2, 0)[..., :3])
add vizdoom example, bump version to 0.4.2 (#384) 2021-06-26 18:08:41 +08:00			`import os`
bump to v0.4.3 (#432) * add makefile * bump version * add isort and yapf * update contributing.md * update PR template * spelling check 2021-09-03 05:05:04 +08:00
add vizdoom example, bump version to 0.4.2 (#384) 2021-06-26 18:08:41 +08:00			`import cv2`
Gymnasium Integration (#789) Changes: - Disclaimer in README - Replaced all occurences of Gym with Gymnasium - Removed code that is now dead since we no longer need to support the old step API - Updated type hints to only allow new step API - Increased required version of envpool to support Gymnasium - Increased required version of PettingZoo to support Gymnasium - Updated `PettingZooEnv` to only use the new step API, removed hack to also support old API - I had to add some `# type: ignore` comments, due to new type hinting in Gymnasium. I'm not that familiar with type hinting but I believe that the issue is on the Gymnasium side and we are looking into it. - Had to update `MyTestEnv` to support `options` kwarg - Skip NNI tests because they still use OpenAI Gym - Also allow `PettingZooEnv` in vector environment - Updated doc page about ReplayBuffer to also talk about terminated and truncated flags. Still need to do: - Update the Jupyter notebooks in docs - Check the entire code base for more dead code (from compatibility stuff) - Check the reset functions of all environments/wrappers in code base to make sure they use the `options` kwarg - Someone might want to check test_env_finite.py - Is it okay to allow `PettingZooEnv` in vector environments? Might need to update docs? 2023-02-03 20:57:27 +01:00			`import gymnasium as gym`
add vizdoom example, bump version to 0.4.2 (#384) 2021-06-26 18:08:41 +08:00			`import numpy as np`
			`import vizdoom as vzd`

use envpool in vizdoom example, update doc (#634) 2022-05-08 12:42:16 -04:00			`from tianshou.env import ShmemVectorEnv`

			`try:`
			`import envpool`
			`except ImportError:`
			`envpool = None`

add vizdoom example, bump version to 0.4.2 (#384) 2021-06-26 18:08:41 +08:00
			`def normal_button_comb():`
			`actions = []`
			`m_forward = [[0.0], [1.0]]`
			`t_left_right = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0]]`
			`for i in m_forward:`
			`for j in t_left_right:`
			`actions.append(i + j)`
			`return actions`


			`def battle_button_comb():`
			`actions = []`
			`m_forward_backward = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0]]`
			`m_left_right = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0]]`
			`t_left_right = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0]]`
			`attack = [[0.0], [1.0]]`
			`speed = [[0.0], [1.0]]`

			`for m in attack:`
			`for n in speed:`
			`for j in m_left_right:`
			`for i in m_forward_backward:`
			`for k in t_left_right:`
			`actions.append(i + j + k + m + n)`
			`return actions`


			`class Env(gym.Env):`
bump to v0.4.3 (#432) * add makefile * bump version * add isort and yapf * update contributing.md * update PR template * spelling check 2021-09-03 05:05:04 +08:00			`def __init__(self, cfg_path, frameskip=4, res=(4, 40, 60), save_lmp=False):`
add vizdoom example, bump version to 0.4.2 (#384) 2021-06-26 18:08:41 +08:00			`super().__init__()`
			`self.save_lmp = save_lmp`
			`self.health_setting = "battle" in cfg_path`
			`if save_lmp:`
			`os.makedirs("lmps", exist_ok=True)`
			`self.res = res`
			`self.skip = frameskip`
Python 3.9, black + ruff formatting (#921) Preparation for #914 and #920 Changes formatting to ruff and black. Remove python 3.8 ## Additional Changes - Removed flake8 dependencies - Adjusted pre-commit. Now CI and Make use pre-commit, reducing the duplication of linting calls - Removed check-docstyle option (ruff is doing that) - Merged format and lint. In CI the format-lint step fails if any changes are done, so it fulfills the lint functionality. --------- Co-authored-by: Jiayi Weng <jiayi@openai.com> 2023-08-25 23:40:56 +02:00			`self.observation_space = gym.spaces.Box(low=0, high=255, shape=res, dtype=np.float32)`
add vizdoom example, bump version to 0.4.2 (#384) 2021-06-26 18:08:41 +08:00			`self.game = vzd.DoomGame()`
			`self.game.load_config(cfg_path)`
			`self.game.init()`
			`if "battle" in cfg_path:`
			`self.available_actions = battle_button_comb()`
			`else:`
			`self.available_actions = normal_button_comb()`
			`self.action_num = len(self.available_actions)`
			`self.action_space = gym.spaces.Discrete(self.action_num)`
			`self.spec = gym.envs.registration.EnvSpec("vizdoom-v0")`
			`self.count = 0`

			`def get_obs(self):`
			`state = self.game.get_state()`
			`if state is None:`
			`return`
			`obs = state.screen_buffer`
			`self.obs_buffer[:-1] = self.obs_buffer[1:]`
			`self.obs_buffer[-1] = cv2.resize(obs, (self.res[-1], self.res[-2]))`

			`def reset(self):`
			`if self.save_lmp:`
			`self.game.new_episode(f"lmps/episode_{self.count}.lmp")`
			`else:`
			`self.game.new_episode()`
			`self.count += 1`
			`self.obs_buffer = np.zeros(self.res, dtype=np.uint8)`
			`self.get_obs()`
			`self.health = self.game.get_game_variable(vzd.GameVariable.HEALTH)`
bump to v0.4.3 (#432) * add makefile * bump version * add isort and yapf * update contributing.md * update PR template * spelling check 2021-09-03 05:05:04 +08:00			`self.killcount = self.game.get_game_variable(vzd.GameVariable.KILLCOUNT)`
add vizdoom example, bump version to 0.4.2 (#384) 2021-06-26 18:08:41 +08:00			`self.ammo2 = self.game.get_game_variable(vzd.GameVariable.AMMO2)`
			`return self.obs_buffer`

			`def step(self, action):`
			`self.game.make_action(self.available_actions[action], self.skip)`
			`reward = 0.0`
			`self.get_obs()`
			`health = self.game.get_game_variable(vzd.GameVariable.HEALTH)`
Python 3.9, black + ruff formatting (#921) Preparation for #914 and #920 Changes formatting to ruff and black. Remove python 3.8 ## Additional Changes - Removed flake8 dependencies - Adjusted pre-commit. Now CI and Make use pre-commit, reducing the duplication of linting calls - Removed check-docstyle option (ruff is doing that) - Merged format and lint. In CI the format-lint step fails if any changes are done, so it fulfills the lint functionality. --------- Co-authored-by: Jiayi Weng <jiayi@openai.com> 2023-08-25 23:40:56 +02:00			`if self.health_setting or health > self.health: # positive health reward only for d1/d2`
add vizdoom example, bump version to 0.4.2 (#384) 2021-06-26 18:08:41 +08:00			`reward += health - self.health`
			`self.health = health`
			`killcount = self.game.get_game_variable(vzd.GameVariable.KILLCOUNT)`
			`reward += 20 * (killcount - self.killcount)`
			`self.killcount = killcount`
			`ammo2 = self.game.get_game_variable(vzd.GameVariable.AMMO2)`
			`# if ammo2 > self.ammo2:`
			`reward += ammo2 - self.ammo2`
			`self.ammo2 = ammo2`
			`done = False`
			`info = {}`
			`if self.game.is_player_dead() or self.game.get_state() is None:`
			`done = True`
			`elif self.game.is_episode_finished():`
			`done = True`
			`info["TimeLimit.truncated"] = True`
			`return self.obs_buffer, reward, done, info`

			`def render(self):`
			`pass`

			`def close(self):`
			`self.game.close()`


use envpool in vizdoom example, update doc (#634) 2022-05-08 12:42:16 -04:00			`def make_vizdoom_env(task, frame_skip, res, save_lmp, seed, training_num, test_num):`
			`test_num = min(os.cpu_count() - 1, test_num)`
			`if envpool is not None:`
			`task_id = "".join([i.capitalize() for i in task.split("_")]) + "-v1"`
			`lmp_save_dir = "lmps/" if save_lmp else ""`
			`reward_config = {`
			`"KILLCOUNT": [20.0, -20.0],`
			`"HEALTH": [1.0, 0.0],`
			`"AMMO2": [1.0, -1.0],`
			`}`
			`if "battle" in task:`
			`reward_config["HEALTH"] = [1.0, -1.0]`
Gymnasium Integration (#789) Changes: - Disclaimer in README - Replaced all occurences of Gym with Gymnasium - Removed code that is now dead since we no longer need to support the old step API - Updated type hints to only allow new step API - Increased required version of envpool to support Gymnasium - Increased required version of PettingZoo to support Gymnasium - Updated `PettingZooEnv` to only use the new step API, removed hack to also support old API - I had to add some `# type: ignore` comments, due to new type hinting in Gymnasium. I'm not that familiar with type hinting but I believe that the issue is on the Gymnasium side and we are looking into it. - Had to update `MyTestEnv` to support `options` kwarg - Skip NNI tests because they still use OpenAI Gym - Also allow `PettingZooEnv` in vector environment - Updated doc page about ReplayBuffer to also talk about terminated and truncated flags. Still need to do: - Update the Jupyter notebooks in docs - Check the entire code base for more dead code (from compatibility stuff) - Check the reset functions of all environments/wrappers in code base to make sure they use the `options` kwarg - Someone might want to check test_env_finite.py - Is it okay to allow `PettingZooEnv` in vector environments? Might need to update docs? 2023-02-03 20:57:27 +01:00			`env = train_envs = envpool.make_gymnasium(`
use envpool in vizdoom example, update doc (#634) 2022-05-08 12:42:16 -04:00			`task_id,`
			`frame_skip=frame_skip,`
			`stack_num=res[0],`
			`seed=seed,`
			`num_envs=training_num,`
			`reward_config=reward_config,`
			`use_combined_action=True,`
			`max_episode_steps=2625,`
			`use_inter_area_resize=False,`
			`)`
Gymnasium Integration (#789) Changes: - Disclaimer in README - Replaced all occurences of Gym with Gymnasium - Removed code that is now dead since we no longer need to support the old step API - Updated type hints to only allow new step API - Increased required version of envpool to support Gymnasium - Increased required version of PettingZoo to support Gymnasium - Updated `PettingZooEnv` to only use the new step API, removed hack to also support old API - I had to add some `# type: ignore` comments, due to new type hinting in Gymnasium. I'm not that familiar with type hinting but I believe that the issue is on the Gymnasium side and we are looking into it. - Had to update `MyTestEnv` to support `options` kwarg - Skip NNI tests because they still use OpenAI Gym - Also allow `PettingZooEnv` in vector environment - Updated doc page about ReplayBuffer to also talk about terminated and truncated flags. Still need to do: - Update the Jupyter notebooks in docs - Check the entire code base for more dead code (from compatibility stuff) - Check the reset functions of all environments/wrappers in code base to make sure they use the `options` kwarg - Someone might want to check test_env_finite.py - Is it okay to allow `PettingZooEnv` in vector environments? Might need to update docs? 2023-02-03 20:57:27 +01:00			`test_envs = envpool.make_gymnasium(`
use envpool in vizdoom example, update doc (#634) 2022-05-08 12:42:16 -04:00			`task_id,`
			`frame_skip=frame_skip,`
			`stack_num=res[0],`
			`lmp_save_dir=lmp_save_dir,`
			`seed=seed,`
			`num_envs=test_num,`
			`reward_config=reward_config,`
			`use_combined_action=True,`
			`max_episode_steps=2625,`
			`use_inter_area_resize=False,`
			`)`
			`else:`
			`cfg_path = f"maps/{task}.cfg"`
			`env = Env(cfg_path, frame_skip, res)`
			`train_envs = ShmemVectorEnv(`
Python 3.9, black + ruff formatting (#921) Preparation for #914 and #920 Changes formatting to ruff and black. Remove python 3.8 ## Additional Changes - Removed flake8 dependencies - Adjusted pre-commit. Now CI and Make use pre-commit, reducing the duplication of linting calls - Removed check-docstyle option (ruff is doing that) - Merged format and lint. In CI the format-lint step fails if any changes are done, so it fulfills the lint functionality. --------- Co-authored-by: Jiayi Weng <jiayi@openai.com> 2023-08-25 23:40:56 +02:00			`[lambda: Env(cfg_path, frame_skip, res) for _ in range(training_num)],`
use envpool in vizdoom example, update doc (#634) 2022-05-08 12:42:16 -04:00			`)`
			`test_envs = ShmemVectorEnv(`
Python 3.9, black + ruff formatting (#921) Preparation for #914 and #920 Changes formatting to ruff and black. Remove python 3.8 ## Additional Changes - Removed flake8 dependencies - Adjusted pre-commit. Now CI and Make use pre-commit, reducing the duplication of linting calls - Removed check-docstyle option (ruff is doing that) - Merged format and lint. In CI the format-lint step fails if any changes are done, so it fulfills the lint functionality. --------- Co-authored-by: Jiayi Weng <jiayi@openai.com> 2023-08-25 23:40:56 +02:00			`[lambda: Env(cfg_path, frame_skip, res, save_lmp) for _ in range(test_num)],`
use envpool in vizdoom example, update doc (#634) 2022-05-08 12:42:16 -04:00			`)`
			`train_envs.seed(seed)`
			`test_envs.seed(seed)`
			`return env, train_envs, test_envs`


Python 3.9, black + ruff formatting (#921) Preparation for #914 and #920 Changes formatting to ruff and black. Remove python 3.8 ## Additional Changes - Removed flake8 dependencies - Adjusted pre-commit. Now CI and Make use pre-commit, reducing the duplication of linting calls - Removed check-docstyle option (ruff is doing that) - Merged format and lint. In CI the format-lint step fails if any changes are done, so it fulfills the lint functionality. --------- Co-authored-by: Jiayi Weng <jiayi@openai.com> 2023-08-25 23:40:56 +02:00			`if __name__ == "__main__":`
add vizdoom example, bump version to 0.4.2 (#384) 2021-06-26 18:08:41 +08:00			`# env = Env("maps/D1_basic.cfg", 4, (4, 84, 84))`
			`env = Env("maps/D3_battle.cfg", 4, (4, 84, 84))`
			`print(env.available_actions)`
			`action_num = env.action_space.n`
			`obs = env.reset()`
			`print(env.spec.reward_threshold)`
			`print(obs.shape, action_num)`
bump to v0.4.3 (#432) * add makefile * bump version * add isort and yapf * update contributing.md * update PR template * spelling check 2021-09-03 05:05:04 +08:00			`for _ in range(4000):`
Method to compute actions from observations (#991) This PR adds a new method for getting actions from an env's observation and info. This is useful for standard inference and stands in contrast to batch-based methods that are currently used in training and evaluation. Without this, users have to do some kind of gymnastics to actually perform inference with a trained policy. I have also added a test for the new method. In future PRs, this method should be included in the examples (in the the "watch" section). To add this required improving multiple typing things and, importantly, _simplifying the signature of `forward` in many policies!_ This is a breaking change, but it will likely affect no users. The `input` parameter of forward was a rather hacky mechanism, I believe it is good that it's gone now. It will also help with #948 . The main functional change is the addition of `compute_action` to `BasePolicy`. Other minor changes: - improvements in typing - updated PR and Issue templates - Improved handling of `max_action_num` Closes #981 2023-11-16 18:27:53 +01:00			`obs, rew, terminated, truncated, info = env.step(0)`
			`if terminated or truncated:`
add vizdoom example, bump version to 0.4.2 (#384) 2021-06-26 18:08:41 +08:00			`env.reset()`
Method to compute actions from observations (#991) This PR adds a new method for getting actions from an env's observation and info. This is useful for standard inference and stands in contrast to batch-based methods that are currently used in training and evaluation. Without this, users have to do some kind of gymnastics to actually perform inference with a trained policy. I have also added a test for the new method. In future PRs, this method should be included in the examples (in the the "watch" section). To add this required improving multiple typing things and, importantly, _simplifying the signature of `forward` in many policies!_ This is a breaking change, but it will likely affect no users. The `input` parameter of forward was a rather hacky mechanism, I believe it is good that it's gone now. It will also help with #948 . The main functional change is the addition of `compute_action` to `BasePolicy`. Other minor changes: - improvements in typing - updated PR and Issue templates - Improved handling of `max_action_num` Closes #981 2023-11-16 18:27:53 +01:00			`print(obs.shape, rew, terminated, truncated)`
add vizdoom example, bump version to 0.4.2 (#384) 2021-06-26 18:08:41 +08:00			`cv2.imwrite("test.png", obs.transpose(1, 2, 0)[..., :3])`