Tianshou/tianshou/highlevel/env.py

from abc import ABC, abstractmethod
from collections.abc import Sequence
from enum import Enum
from typing import Any

import gymnasium as gym

from tianshou.env import BaseVectorEnv
from tianshou.highlevel.persistence import PersistableConfigProtocol

TShape = int | Sequence[int]


class EnvType(Enum):
    CONTINUOUS = "continuous"
    DISCRETE = "discrete"

    def is_discrete(self):
        return self == EnvType.DISCRETE

    def is_continuous(self):
        return self == EnvType.CONTINUOUS

    def assert_continuous(self, requiring_entity: Any):
        if not self.is_continuous():
            raise AssertionError(f"{requiring_entity} requires continuous environments")

    def assert_discrete(self, requiring_entity: Any):
        if not self.is_discrete():
            raise AssertionError(f"{requiring_entity} requires discrete environments")


class Environments(ABC):
    def __init__(self, env: gym.Env | None, train_envs: BaseVectorEnv, test_envs: BaseVectorEnv):
        self.env = env
        self.train_envs = train_envs
        self.test_envs = test_envs

    def info(self) -> dict[str, Any]:
        return {
            "action_shape": self.get_action_shape(),
            "state_shape": self.get_observation_shape(),
        }

    @abstractmethod
    def get_action_shape(self) -> TShape:
        pass

    @abstractmethod
    def get_observation_shape(self) -> TShape:
        pass

    def get_action_space(self) -> gym.Space:
        return self.env.action_space

    def get_observation_space(self) -> gym.Space:
        return self.env.observation_space

    @abstractmethod
    def get_type(self) -> EnvType:
        pass


class ContinuousEnvironments(Environments):
    def __init__(self, env: gym.Env | None, train_envs: BaseVectorEnv, test_envs: BaseVectorEnv):
        super().__init__(env, train_envs, test_envs)
        self.state_shape, self.action_shape, self.max_action = self._get_continuous_env_info(env)

    def info(self):
        d = super().info()
        d["max_action"] = self.max_action
        return d

    @staticmethod
    def _get_continuous_env_info(
        env: gym.Env,
    ) -> tuple[tuple[int, ...], tuple[int, ...], float]:
        if not isinstance(env.action_space, gym.spaces.Box):
            raise ValueError(
                "Only environments with continuous action space are supported here. "
                f"But got env with action space: {env.action_space.__class__}.",
            )
        state_shape = env.observation_space.shape or env.observation_space.n
        if not state_shape:
            raise ValueError("Observation space shape is not defined")
        action_shape = env.action_space.shape
        max_action = env.action_space.high[0]
        return state_shape, action_shape, max_action

    def get_action_shape(self) -> TShape:
        return self.action_shape

    def get_observation_shape(self) -> TShape:
        return self.state_shape

    def get_type(self) -> EnvType:
        return EnvType.CONTINUOUS


class DiscreteEnvironments(Environments):
    def __init__(self, env: gym.Env | None, train_envs: BaseVectorEnv, test_envs: BaseVectorEnv):
        super().__init__(env, train_envs, test_envs)
        self.observation_shape = env.observation_space.shape or env.observation_space.n
        self.action_shape = env.action_space.shape or env.action_space.n

    def get_action_shape(self) -> TShape:
        return self.action_shape

    def get_observation_shape(self) -> TShape:
        return self.observation_shape

    def get_type(self) -> EnvType:
        return EnvType.DISCRETE


class EnvFactory(ABC):
    @abstractmethod
    def create_envs(self, config: PersistableConfigProtocol | None = None) -> Environments:
        pass

    def __call__(self, config: PersistableConfigProtocol | None = None) -> Environments:
        return self.create_envs(config=config)
Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00			`from abc import ABC, abstractmethod`
Add SAC high-level interface 2023-09-20 09:29:34 +02:00			`from collections.abc import Sequence`
Add high-level experiment builder interface 2023-09-21 12:36:27 +02:00			`from enum import Enum`
Add SAC high-level interface 2023-09-20 09:29:34 +02:00			`from typing import Any`
Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00
			`import gymnasium as gym`

			`from tianshou.env import BaseVectorEnv`
Add alternative functional interface for environment creation where a persistable configuration object is passed as an argument, as this can help to ensure persistability (making the requirement explicit) 2023-09-27 14:10:45 +02:00			`from tianshou.highlevel.persistence import PersistableConfigProtocol`
Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00
Add SAC high-level interface 2023-09-20 09:29:34 +02:00			`TShape = int \| Sequence[int]`
Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00

Add high-level experiment builder interface 2023-09-21 12:36:27 +02:00			`class EnvType(Enum):`
			`CONTINUOUS = "continuous"`
			`DISCRETE = "discrete"`

			`def is_discrete(self):`
			`return self == EnvType.DISCRETE`

			`def is_continuous(self):`
			`return self == EnvType.CONTINUOUS`

Add high-level API support for TD3 * Created mixins for agent factories to reduce code duplication * Further factorised params & mixins for experiment factories * Additional parameter abstractions * Implement high-level MuJoCo TD3 example 2023-09-26 15:35:18 +02:00			`def assert_continuous(self, requiring_entity: Any):`
			`if not self.is_continuous():`
			`raise AssertionError(f"{requiring_entity} requires continuous environments")`

			`def assert_discrete(self, requiring_entity: Any):`
			`if not self.is_discrete():`
			`raise AssertionError(f"{requiring_entity} requires discrete environments")`

Add high-level experiment builder interface 2023-09-21 12:36:27 +02:00
Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00			`class Environments(ABC):`
Add SAC high-level interface 2023-09-20 09:29:34 +02:00			`def __init__(self, env: gym.Env \| None, train_envs: BaseVectorEnv, test_envs: BaseVectorEnv):`
Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00			`self.env = env`
			`self.train_envs = train_envs`
			`self.test_envs = test_envs`

Add SAC high-level interface 2023-09-20 09:29:34 +02:00			`def info(self) -> dict[str, Any]:`
Add high-level API support for TD3 * Created mixins for agent factories to reduce code duplication * Further factorised params & mixins for experiment factories * Additional parameter abstractions * Implement high-level MuJoCo TD3 example 2023-09-26 15:35:18 +02:00			`return {`
			`"action_shape": self.get_action_shape(),`
			`"state_shape": self.get_observation_shape(),`
			`}`
Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00
			`@abstractmethod`
			`def get_action_shape(self) -> TShape:`
			`pass`

			`@abstractmethod`
Improve high-level policy parametrisation Policy objects are now parametrised by converting the parameter dataclass instances to kwargs, using some injectable conversions along the way 2023-09-25 17:56:37 +02:00			`def get_observation_shape(self) -> TShape:`
Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00			`pass`

			`def get_action_space(self) -> gym.Space:`
			`return self.env.action_space`

Improve high-level policy parametrisation Policy objects are now parametrised by converting the parameter dataclass instances to kwargs, using some injectable conversions along the way 2023-09-25 17:56:37 +02:00			`def get_observation_space(self) -> gym.Space:`
			`return self.env.observation_space`

Add high-level experiment builder interface 2023-09-21 12:36:27 +02:00			`@abstractmethod`
			`def get_type(self) -> EnvType:`
			`pass`

Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00
			`class ContinuousEnvironments(Environments):`
Add SAC high-level interface 2023-09-20 09:29:34 +02:00			`def __init__(self, env: gym.Env \| None, train_envs: BaseVectorEnv, test_envs: BaseVectorEnv):`
Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00			`super().__init__(env, train_envs, test_envs)`
			`self.state_shape, self.action_shape, self.max_action = self._get_continuous_env_info(env)`

			`def info(self):`
			`d = super().info()`
			`d["max_action"] = self.max_action`
			`return d`

			`@staticmethod`
			`def _get_continuous_env_info(`
Add SAC high-level interface 2023-09-20 09:29:34 +02:00			`env: gym.Env,`
			`) -> tuple[tuple[int, ...], tuple[int, ...], float]:`
Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00			`if not isinstance(env.action_space, gym.spaces.Box):`
			`raise ValueError(`
			`"Only environments with continuous action space are supported here. "`
Add SAC high-level interface 2023-09-20 09:29:34 +02:00			`f"But got env with action space: {env.action_space.__class__}.",`
Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00			`)`
			`state_shape = env.observation_space.shape or env.observation_space.n`
			`if not state_shape:`
			`raise ValueError("Observation space shape is not defined")`
			`action_shape = env.action_space.shape`
			`max_action = env.action_space.high[0]`
			`return state_shape, action_shape, max_action`

			`def get_action_shape(self) -> TShape:`
			`return self.action_shape`

Improve high-level policy parametrisation Policy objects are now parametrised by converting the parameter dataclass instances to kwargs, using some injectable conversions along the way 2023-09-25 17:56:37 +02:00			`def get_observation_shape(self) -> TShape:`
Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00			`return self.state_shape`

Add high-level API support for TD3 * Created mixins for agent factories to reduce code duplication * Further factorised params & mixins for experiment factories * Additional parameter abstractions * Implement high-level MuJoCo TD3 example 2023-09-26 15:35:18 +02:00			`def get_type(self) -> EnvType:`
Add high-level experiment builder interface 2023-09-21 12:36:27 +02:00			`return EnvType.CONTINUOUS`

Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00
Add support for discrete PPO * Refactored module `module` (split into submodules) * Basic support for discrete environments * Implement Atari env. factory * Implement DQN-based actor factory * Implement notion of reusing agent preprocessing network for critic * Add example atari_ppo_hl 2023-09-28 20:07:52 +02:00			`class DiscreteEnvironments(Environments):`
			`def __init__(self, env: gym.Env \| None, train_envs: BaseVectorEnv, test_envs: BaseVectorEnv):`
			`super().__init__(env, train_envs, test_envs)`
			`self.observation_shape = env.observation_space.shape or env.observation_space.n`
			`self.action_shape = env.action_space.shape or env.action_space.n`

			`def get_action_shape(self) -> TShape:`
			`return self.action_shape`

			`def get_observation_shape(self) -> TShape:`
			`return self.observation_shape`

			`def get_type(self) -> EnvType:`
			`return EnvType.DISCRETE`


Initial high-level interfaces, demonstrated in mujoco_ppo_hl 2023-09-19 18:53:11 +02:00			`class EnvFactory(ABC):`
			`@abstractmethod`
Add alternative functional interface for environment creation where a persistable configuration object is passed as an argument, as this can help to ensure persistability (making the requirement explicit) 2023-09-27 14:10:45 +02:00			`def create_envs(self, config: PersistableConfigProtocol \| None = None) -> Environments:`
Add SAC high-level interface 2023-09-20 09:29:34 +02:00			`pass`
Add alternative functional interface for environment creation where a persistable configuration object is passed as an argument, as this can help to ensure persistability (making the requirement explicit) 2023-09-27 14:10:45 +02:00
			`def __call__(self, config: PersistableConfigProtocol \| None = None) -> Environments:`
			`return self.create_envs(config=config)`