fixed typo in rainbow DQN paper reference (#569)

* fixed typo in rainbow DQN paper ref

* fix gym==0.23 ci failure

Co-authored-by: Jiayi Weng <trinkle23897@gmail.com>
This commit is contained in:
Andrea Boscolo Camiletto 2022-03-16 14:38:51 +01:00 committed by GitHub
parent 39f8391cfb
commit 2336a7db1b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 15 additions and 14 deletions

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2020 Tianshou contributors
Copyright (c) 2022 Tianshou contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -13,7 +13,7 @@ Welcome to Tianshou!
* :class:`~tianshou.policy.DQNPolicy` `Double DQN <https://arxiv.org/pdf/1509.06461.pdf>`_
* :class:`~tianshou.policy.DQNPolicy` `Dueling DQN <https://arxiv.org/pdf/1511.06581.pdf>`_
* :class:`~tianshou.policy.C51Policy` `Categorical DQN <https://arxiv.org/pdf/1707.06887.pdf>`_
* :class:`~tianshou.policy.RainbowPolicy` `Rainbow DQN <https://arxiv.org/pdf/1707.02298.pdf>`_
* :class:`~tianshou.policy.RainbowPolicy` `Rainbow DQN <https://arxiv.org/pdf/1710.02298.pdf>`_
* :class:`~tianshou.policy.QRDQNPolicy` `Quantile Regression DQN <https://arxiv.org/pdf/1710.10044.pdf>`_
* :class:`~tianshou.policy.IQNPolicy` `Implicit Quantile Network <https://arxiv.org/pdf/1806.06923.pdf>`_
* :class:`~tianshou.policy.FQFPolicy` `Fully-parameterized Quantile Function <https://arxiv.org/pdf/1911.02140.pdf>`_

View File

@ -64,14 +64,15 @@ class Collector(object):
super().__init__()
if isinstance(env, gym.Env) and not hasattr(env, "__len__"):
warnings.warn("Single environment detected, wrap to DummyVectorEnv.")
env = DummyVectorEnv([lambda: env])
self.env = env
self.env_num = len(env)
self.env = DummyVectorEnv([lambda: env]) # type: ignore
else:
self.env = env # type: ignore
self.env_num = len(self.env)
self.exploration_noise = exploration_noise
self._assign_buffer(buffer)
self.policy = policy
self.preprocess_fn = preprocess_fn
self._action_space = env.action_space
self._action_space = self.env.action_space
# avoid creating attribute outside __init__
self.reset(False)

View File

@ -6,7 +6,7 @@ from pettingzoo.utils.env import AECEnv
from pettingzoo.utils.wrappers import BaseWrapper
class PettingZooEnv(AECEnv, gym.Env, ABC):
class PettingZooEnv(AECEnv, ABC):
"""The interface for petting zoo environments.
Multi-agent environments must be wrapped as

View File

@ -12,7 +12,7 @@ from tianshou.env.worker import (
from tianshou.utils import RunningMeanStd
class BaseVectorEnv(gym.Env):
class BaseVectorEnv(object):
"""Base class for vectorized environments wrapper.
Usage:
@ -196,6 +196,7 @@ class BaseVectorEnv(gym.Env):
assert i in self.ready_id, \
f"Can only interact with ready environments {self.ready_id}."
# TODO: compatible issue with reset -> (obs, info)
def reset(
self, id: Optional[Union[int, List[int], np.ndarray]] = None
) -> np.ndarray:

View File

@ -31,9 +31,9 @@ class DummyEnvWorker(EnvWorker):
def send(self, action: Optional[np.ndarray]) -> None:
if action is None:
self.result = self.env.reset()
self.result = self.env.reset() # type: ignore
else:
self.result = self.env.step(action)
self.result = self.env.step(action) # type: ignore
def seed(self, seed: Optional[int] = None) -> List[int]:
super().seed(seed)

View File

@ -53,7 +53,7 @@ def _setup_buf(space: gym.Space) -> Union[dict, tuple, ShArray]:
assert isinstance(space.spaces, tuple)
return tuple([_setup_buf(t) for t in space.spaces])
else:
return ShArray(space.dtype, space.shape)
return ShArray(space.dtype, space.shape) # type: ignore
def _worker(

View File

@ -122,9 +122,8 @@ class SACPolicy(DDPGPolicy):
# You can check out the original SAC paper (arXiv 1801.01290): Eq 21.
# in appendix C to get some understanding of this equation.
if self.action_scaling and self.action_space is not None:
action_scale = to_torch_as(
(self.action_space.high - self.action_space.low) / 2.0, act
)
low, high = self.action_space.low, self.action_space.high # type: ignore
action_scale = to_torch_as((high - low) / 2.0, act)
else:
action_scale = 1.0 # type: ignore
squashed_action = torch.tanh(act)