Tianshou/tianshou/env/venv_wrappers.py
Michael Panchenko 2cc34fb72b
Poetry install, remove gym, bump python (#925)
Closes #914 

Additional changes:

- Deprecate python below 11
- Remove 3rd party and throughput tests. This simplifies install and
test pipeline
- Remove gym compatibility and shimmy
- Format with 3.11 conventions. In particular, add `zip(...,
strict=True/False)` where possible

Since the additional tests and gym were complicating the CI pipeline
(flaky and dist-dependent), it didn't make sense to work on fixing the
current tests in this PR to then just delete them in the next one. So
this PR changes the build and removes these tests at the same time.
2023-09-05 14:34:23 -07:00

121 lines
3.7 KiB
Python

from typing import Any
import numpy as np
import torch
from tianshou.env.utils import gym_new_venv_step_type
from tianshou.env.venvs import GYM_RESERVED_KEYS, BaseVectorEnv
from tianshou.utils import RunningMeanStd
class VectorEnvWrapper(BaseVectorEnv):
"""Base class for vectorized environments wrapper."""
# Note: No super call because this is a wrapper with overridden __getattribute__
# It's not a "true" subclass of BaseVectorEnv but it does extend its interface, so
# it can be used as a drop-in replacement
# noinspection PyMissingConstructor
def __init__(self, venv: BaseVectorEnv) -> None:
self.venv = venv
self.is_async = venv.is_async
def __len__(self) -> int:
return len(self.venv)
def __getattribute__(self, key: str) -> Any:
if key in GYM_RESERVED_KEYS: # reserved keys in gym.Env
return getattr(self.venv, key)
return super().__getattribute__(key)
def get_env_attr(
self,
key: str,
id: int | list[int] | np.ndarray | None = None,
) -> list[Any]:
return self.venv.get_env_attr(key, id)
def set_env_attr(
self,
key: str,
value: Any,
id: int | list[int] | np.ndarray | None = None,
) -> None:
return self.venv.set_env_attr(key, value, id)
def reset(
self,
id: int | list[int] | np.ndarray | None = None,
**kwargs: Any,
) -> tuple[np.ndarray, dict | list[dict]]:
return self.venv.reset(id, **kwargs)
def step(
self,
action: np.ndarray | torch.Tensor,
id: int | list[int] | np.ndarray | None = None,
) -> gym_new_venv_step_type:
return self.venv.step(action, id)
def seed(self, seed: int | list[int] | None = None) -> list[list[int] | None]:
return self.venv.seed(seed)
def render(self, **kwargs: Any) -> list[Any]:
return self.venv.render(**kwargs)
def close(self) -> None:
self.venv.close()
class VectorEnvNormObs(VectorEnvWrapper):
"""An observation normalization wrapper for vectorized environments.
:param bool update_obs_rms: whether to update obs_rms. Default to True.
"""
def __init__(self, venv: BaseVectorEnv, update_obs_rms: bool = True) -> None:
super().__init__(venv)
# initialize observation running mean/std
self.update_obs_rms = update_obs_rms
self.obs_rms = RunningMeanStd()
def reset(
self,
id: int | list[int] | np.ndarray | None = None,
**kwargs: Any,
) -> tuple[np.ndarray, dict | list[dict]]:
obs, info = self.venv.reset(id, **kwargs)
if isinstance(obs, tuple): # type: ignore
raise TypeError(
"Tuple observation space is not supported. ",
"Please change it to array or dict space",
)
if self.obs_rms and self.update_obs_rms:
self.obs_rms.update(obs)
obs = self._norm_obs(obs)
return obs, info
def step(
self,
action: np.ndarray | torch.Tensor,
id: int | list[int] | np.ndarray | None = None,
) -> gym_new_venv_step_type:
step_results = self.venv.step(action, id)
if self.obs_rms and self.update_obs_rms:
self.obs_rms.update(step_results[0])
return (self._norm_obs(step_results[0]), *step_results[1:])
def _norm_obs(self, obs: np.ndarray) -> np.ndarray:
if self.obs_rms:
return self.obs_rms.norm(obs) # type: ignore
return obs
def set_obs_rms(self, obs_rms: RunningMeanStd) -> None:
"""Set with given observation running mean/std."""
self.obs_rms = obs_rms
def get_obs_rms(self) -> RunningMeanStd:
"""Return observation running mean/std."""
return self.obs_rms