fixed typo in rainbow DQN paper reference (#569)

* fixed typo in rainbow DQN paper ref * fix gym==0.23 ci failure Co-authored-by: Jiayi Weng <trinkle23897@gmail.com>
2022-03-16 14:38:51 +01:00 · 2022-03-16 14:38:51 +01:00 · 2336a7db1b
commit 2336a7db1b
parent 39f8391cfb
8 changed files with 15 additions and 14 deletions
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 MIT License

-Copyright (c) 2020 Tianshou contributors
+Copyright (c) 2022 Tianshou contributors

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/docs/index.rst
+++ b/docs/index.rst
@ -13,7 +13,7 @@ Welcome to Tianshou!
 * :class:`~tianshou.policy.DQNPolicy` `Double DQN <https://arxiv.org/pdf/1509.06461.pdf>`_
 * :class:`~tianshou.policy.DQNPolicy` `Dueling DQN <https://arxiv.org/pdf/1511.06581.pdf>`_
 * :class:`~tianshou.policy.C51Policy` `Categorical DQN <https://arxiv.org/pdf/1707.06887.pdf>`_
-* :class:`~tianshou.policy.RainbowPolicy` `Rainbow DQN <https://arxiv.org/pdf/1707.02298.pdf>`_
+* :class:`~tianshou.policy.RainbowPolicy` `Rainbow DQN <https://arxiv.org/pdf/1710.02298.pdf>`_
 * :class:`~tianshou.policy.QRDQNPolicy` `Quantile Regression DQN <https://arxiv.org/pdf/1710.10044.pdf>`_
 * :class:`~tianshou.policy.IQNPolicy` `Implicit Quantile Network <https://arxiv.org/pdf/1806.06923.pdf>`_
 * :class:`~tianshou.policy.FQFPolicy` `Fully-parameterized Quantile Function <https://arxiv.org/pdf/1911.02140.pdf>`_
--- a/tianshou/data/collector.py
+++ b/tianshou/data/collector.py
@ -64,14 +64,15 @@ class Collector(object):
        super().__init__()
        if isinstance(env, gym.Env) and not hasattr(env, "__len__"):
            warnings.warn("Single environment detected, wrap to DummyVectorEnv.")
-            env = DummyVectorEnv([lambda: env])
-        self.env = env
-        self.env_num = len(env)
+            self.env = DummyVectorEnv([lambda: env])  # type: ignore
+        else:
+            self.env = env  # type: ignore
+        self.env_num = len(self.env)
        self.exploration_noise = exploration_noise
        self._assign_buffer(buffer)
        self.policy = policy
        self.preprocess_fn = preprocess_fn
-        self._action_space = env.action_space
+        self._action_space = self.env.action_space
        # avoid creating attribute outside __init__
        self.reset(False)

--- a/tianshou/env/pettingzoo_env.py
+++ b/tianshou/env/pettingzoo_env.py
@ -6,7 +6,7 @@ from pettingzoo.utils.env import AECEnv
 from pettingzoo.utils.wrappers import BaseWrapper


-class PettingZooEnv(AECEnv, gym.Env, ABC):
+class PettingZooEnv(AECEnv, ABC):
    """The interface for petting zoo environments.

    Multi-agent environments must be wrapped as
--- a/tianshou/env/venvs.py
+++ b/tianshou/env/venvs.py
@ -12,7 +12,7 @@ from tianshou.env.worker import (
 from tianshou.utils import RunningMeanStd


-class BaseVectorEnv(gym.Env):
+class BaseVectorEnv(object):
    """Base class for vectorized environments wrapper.

    Usage:
@ -196,6 +196,7 @@ class BaseVectorEnv(gym.Env):
            assert i in self.ready_id, \
                f"Can only interact with ready environments {self.ready_id}."

+    # TODO: compatible issue with reset -> (obs, info)
    def reset(
        self, id: Optional[Union[int, List[int], np.ndarray]] = None
    ) -> np.ndarray:
--- a/tianshou/env/worker/dummy.py
+++ b/tianshou/env/worker/dummy.py
@ -31,9 +31,9 @@ class DummyEnvWorker(EnvWorker):

    def send(self, action: Optional[np.ndarray]) -> None:
        if action is None:
-            self.result = self.env.reset()
+            self.result = self.env.reset()  # type: ignore
        else:
-            self.result = self.env.step(action)
+            self.result = self.env.step(action)  # type: ignore

    def seed(self, seed: Optional[int] = None) -> List[int]:
        super().seed(seed)
--- a/tianshou/env/worker/subproc.py
+++ b/tianshou/env/worker/subproc.py
@ -53,7 +53,7 @@ def _setup_buf(space: gym.Space) -> Union[dict, tuple, ShArray]:
        assert isinstance(space.spaces, tuple)
        return tuple([_setup_buf(t) for t in space.spaces])
    else:
-        return ShArray(space.dtype, space.shape)
+        return ShArray(space.dtype, space.shape)  # type: ignore


 def _worker(
--- a/tianshou/policy/modelfree/sac.py
+++ b/tianshou/policy/modelfree/sac.py
@ -122,9 +122,8 @@ class SACPolicy(DDPGPolicy):
        # You can check out the original SAC paper (arXiv 1801.01290): Eq 21.
        # in appendix C to get some understanding of this equation.
        if self.action_scaling and self.action_space is not None:
-            action_scale = to_torch_as(
-                (self.action_space.high - self.action_space.low) / 2.0, act
-            )
+            low, high = self.action_space.low, self.action_space.high  # type: ignore
+            action_scale = to_torch_as((high - low) / 2.0, act)
        else:
            action_scale = 1.0  # type: ignore
        squashed_action = torch.tanh(act)