fixed typo in rainbow DQN paper reference (#569)
* fixed typo in rainbow DQN paper ref * fix gym==0.23 ci failure Co-authored-by: Jiayi Weng <trinkle23897@gmail.com>
This commit is contained in:
parent
39f8391cfb
commit
2336a7db1b
2
LICENSE
2
LICENSE
@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2020 Tianshou contributors
|
||||
Copyright (c) 2022 Tianshou contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
@ -13,7 +13,7 @@ Welcome to Tianshou!
|
||||
* :class:`~tianshou.policy.DQNPolicy` `Double DQN <https://arxiv.org/pdf/1509.06461.pdf>`_
|
||||
* :class:`~tianshou.policy.DQNPolicy` `Dueling DQN <https://arxiv.org/pdf/1511.06581.pdf>`_
|
||||
* :class:`~tianshou.policy.C51Policy` `Categorical DQN <https://arxiv.org/pdf/1707.06887.pdf>`_
|
||||
* :class:`~tianshou.policy.RainbowPolicy` `Rainbow DQN <https://arxiv.org/pdf/1707.02298.pdf>`_
|
||||
* :class:`~tianshou.policy.RainbowPolicy` `Rainbow DQN <https://arxiv.org/pdf/1710.02298.pdf>`_
|
||||
* :class:`~tianshou.policy.QRDQNPolicy` `Quantile Regression DQN <https://arxiv.org/pdf/1710.10044.pdf>`_
|
||||
* :class:`~tianshou.policy.IQNPolicy` `Implicit Quantile Network <https://arxiv.org/pdf/1806.06923.pdf>`_
|
||||
* :class:`~tianshou.policy.FQFPolicy` `Fully-parameterized Quantile Function <https://arxiv.org/pdf/1911.02140.pdf>`_
|
||||
|
@ -64,14 +64,15 @@ class Collector(object):
|
||||
super().__init__()
|
||||
if isinstance(env, gym.Env) and not hasattr(env, "__len__"):
|
||||
warnings.warn("Single environment detected, wrap to DummyVectorEnv.")
|
||||
env = DummyVectorEnv([lambda: env])
|
||||
self.env = env
|
||||
self.env_num = len(env)
|
||||
self.env = DummyVectorEnv([lambda: env]) # type: ignore
|
||||
else:
|
||||
self.env = env # type: ignore
|
||||
self.env_num = len(self.env)
|
||||
self.exploration_noise = exploration_noise
|
||||
self._assign_buffer(buffer)
|
||||
self.policy = policy
|
||||
self.preprocess_fn = preprocess_fn
|
||||
self._action_space = env.action_space
|
||||
self._action_space = self.env.action_space
|
||||
# avoid creating attribute outside __init__
|
||||
self.reset(False)
|
||||
|
||||
|
2
tianshou/env/pettingzoo_env.py
vendored
2
tianshou/env/pettingzoo_env.py
vendored
@ -6,7 +6,7 @@ from pettingzoo.utils.env import AECEnv
|
||||
from pettingzoo.utils.wrappers import BaseWrapper
|
||||
|
||||
|
||||
class PettingZooEnv(AECEnv, gym.Env, ABC):
|
||||
class PettingZooEnv(AECEnv, ABC):
|
||||
"""The interface for petting zoo environments.
|
||||
|
||||
Multi-agent environments must be wrapped as
|
||||
|
3
tianshou/env/venvs.py
vendored
3
tianshou/env/venvs.py
vendored
@ -12,7 +12,7 @@ from tianshou.env.worker import (
|
||||
from tianshou.utils import RunningMeanStd
|
||||
|
||||
|
||||
class BaseVectorEnv(gym.Env):
|
||||
class BaseVectorEnv(object):
|
||||
"""Base class for vectorized environments wrapper.
|
||||
|
||||
Usage:
|
||||
@ -196,6 +196,7 @@ class BaseVectorEnv(gym.Env):
|
||||
assert i in self.ready_id, \
|
||||
f"Can only interact with ready environments {self.ready_id}."
|
||||
|
||||
# TODO: compatible issue with reset -> (obs, info)
|
||||
def reset(
|
||||
self, id: Optional[Union[int, List[int], np.ndarray]] = None
|
||||
) -> np.ndarray:
|
||||
|
4
tianshou/env/worker/dummy.py
vendored
4
tianshou/env/worker/dummy.py
vendored
@ -31,9 +31,9 @@ class DummyEnvWorker(EnvWorker):
|
||||
|
||||
def send(self, action: Optional[np.ndarray]) -> None:
|
||||
if action is None:
|
||||
self.result = self.env.reset()
|
||||
self.result = self.env.reset() # type: ignore
|
||||
else:
|
||||
self.result = self.env.step(action)
|
||||
self.result = self.env.step(action) # type: ignore
|
||||
|
||||
def seed(self, seed: Optional[int] = None) -> List[int]:
|
||||
super().seed(seed)
|
||||
|
2
tianshou/env/worker/subproc.py
vendored
2
tianshou/env/worker/subproc.py
vendored
@ -53,7 +53,7 @@ def _setup_buf(space: gym.Space) -> Union[dict, tuple, ShArray]:
|
||||
assert isinstance(space.spaces, tuple)
|
||||
return tuple([_setup_buf(t) for t in space.spaces])
|
||||
else:
|
||||
return ShArray(space.dtype, space.shape)
|
||||
return ShArray(space.dtype, space.shape) # type: ignore
|
||||
|
||||
|
||||
def _worker(
|
||||
|
@ -122,9 +122,8 @@ class SACPolicy(DDPGPolicy):
|
||||
# You can check out the original SAC paper (arXiv 1801.01290): Eq 21.
|
||||
# in appendix C to get some understanding of this equation.
|
||||
if self.action_scaling and self.action_space is not None:
|
||||
action_scale = to_torch_as(
|
||||
(self.action_space.high - self.action_space.low) / 2.0, act
|
||||
)
|
||||
low, high = self.action_space.low, self.action_space.high # type: ignore
|
||||
action_scale = to_torch_as((high - low) / 2.0, act)
|
||||
else:
|
||||
action_scale = 1.0 # type: ignore
|
||||
squashed_action = torch.tanh(act)
|
||||
|
Loading…
x
Reference in New Issue
Block a user