From 513573ea8278d93a144ffa734719fd60721f9d70 Mon Sep 17 00:00:00 2001 From: Trinkle23897 <463003665@qq.com> Date: Mon, 8 Jun 2020 22:20:52 +0800 Subject: [PATCH] add link --- README.md | 7 ++++--- docs/index.rst | 7 ++++--- docs/tutorials/cheatsheet.rst | 4 +++- tianshou/env/vecenv.py | 14 +++++++------- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 8dc783d..61b3bcc 100644 --- a/README.md +++ b/README.md @@ -33,9 +33,10 @@ Here is Tianshou's other features: - Elegant framework, using only ~2000 lines of code -- Support parallel environment sampling for all algorithms -- Support recurrent state representation in actor network and critic network (RNN-style training for POMDP) -- Support any type of environment state (e.g. a dict, a self-defined class, ...) +- Support parallel environment sampling for all algorithms [Usage](https://tianshou.readthedocs.io/en/latest/tutorials/cheatsheet.html#parallel-sampling) +- Support recurrent state representation in actor network and critic network (RNN-style training for POMDP) [Usage](https://tianshou.readthedocs.io/en/latest/tutorials/cheatsheet.html#rnn-style-training) +- Support any type of environment state (e.g. a dict, a self-defined class, ...) [Usage](https://tianshou.readthedocs.io/en/latest/tutorials/cheatsheet.html#user-defined-environment-and-different-state-representation) +- Support customized training process [Usage](https://tianshou.readthedocs.io/en/latest/tutorials/cheatsheet.html#customize-training-process) - Support n-step returns estimation for all Q-learning based algorithms In Chinese, Tianshou means divinely ordained and is derived to the gift of being born with. Tianshou is a reinforcement learning platform, and the RL algorithm does not learn from humans. So taking "Tianshou" means that there is no teacher to study with, but rather to learn by themselves through constant interaction with the environment. diff --git a/docs/index.rst b/docs/index.rst index c71f9eb..ab4a8a1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -23,9 +23,10 @@ Welcome to Tianshou! Here is Tianshou's other features: * Elegant framework, using only ~2000 lines of code -* Support parallel environment sampling for all algorithms -* Support recurrent state representation in actor network and critic network (RNN-style training for POMDP) -* Support any type of environment state (e.g. a dict, a self-defined class, ...) +* Support parallel environment sampling for all algorithms: :ref:`parallel_sampling` +* Support recurrent state representation in actor network and critic network (RNN-style training for POMDP): :ref:`rnn_training` +* Support any type of environment state (e.g. a dict, a self-defined class, ...): :ref:`self_defined_env` +* Support customized training process: :ref:`customize_training` * Support n-step returns estimation :meth:`~tianshou.policy.BasePolicy.compute_nstep_return` for all Q-learning based algorithms 中文文档位于 https://tianshou.readthedocs.io/zh/latest/ diff --git a/docs/tutorials/cheatsheet.rst b/docs/tutorials/cheatsheet.rst index 61e1352..178fb32 100644 --- a/docs/tutorials/cheatsheet.rst +++ b/docs/tutorials/cheatsheet.rst @@ -19,13 +19,15 @@ Build New Policy See :class:`~tianshou.policy.BasePolicy`. -.. _parallel_sampling: +.. _customize_training: Customize Training Process -------------------------- See :ref:`customized_trainer`. +.. _parallel_sampling: + Parallel Sampling ----------------- diff --git a/tianshou/env/vecenv.py b/tianshou/env/vecenv.py index 59993f2..2ca7431 100644 --- a/tianshou/env/vecenv.py +++ b/tianshou/env/vecenv.py @@ -97,15 +97,15 @@ class BaseVectorEnv(ABC, gym.Env): pass @abstractmethod - def seed(self, seed: Optional[Union[int, List[int]]] = None) -> None: + def seed(self, seed: Optional[Union[int, List[int]]] = None) -> List[int]: """Set the seed for all environments. Accept ``None``, an int (which will extend ``i`` to ``[i, i + 1, i + 2, ...]``) or a list. - :return: The list of seeds used in this env's random number generators. - The first value in the list should be the "main" seed, or the value - which a reproducer should pass to 'seed'. + :return: The list of seeds used in this env's random number \ + generators. The first value in the list should be the "main" seed, or \ + the value which a reproducer pass to "seed". """ pass @@ -162,7 +162,7 @@ class VectorEnv(BaseVectorEnv): self._info = np.stack(self._info) return self._obs, self._rew, self._done, self._info - def seed(self, seed: Optional[Union[int, List[int]]] = None) -> None: + def seed(self, seed: Optional[Union[int, List[int]]] = None) -> List[int]: if np.isscalar(seed): seed = [seed + _ for _ in range(self.env_num)] elif seed is None: @@ -269,7 +269,7 @@ class SubprocVectorEnv(BaseVectorEnv): self._obs[i] = self.parent_remote[i].recv() return self._obs - def seed(self, seed: Optional[Union[int, List[int]]] = None) -> None: + def seed(self, seed: Optional[Union[int, List[int]]] = None) -> List[int]: if np.isscalar(seed): seed = [seed + _ for _ in range(self.env_num)] elif seed is None: @@ -347,7 +347,7 @@ class RayVectorEnv(BaseVectorEnv): self._obs[i] = ray.get(result_obj[_]) return self._obs - def seed(self, seed: Optional[Union[int, List[int]]] = None) -> None: + def seed(self, seed: Optional[Union[int, List[int]]] = None) -> List[int]: if not hasattr(self.envs[0], 'seed'): return if np.isscalar(seed):