From 6df56161f52c0fae9ad163ac825c8e52ccfde974 Mon Sep 17 00:00:00 2001 From: carlocagnetta Date: Thu, 9 Nov 2023 13:36:23 +0100 Subject: [PATCH] Move notebooks to doc and resolve spellcheck --- .github/workflows/notebooks.yml | 2 +- .gitignore | 2 +- .pre-commit-config.yaml | 4 +- .../notebooks}/L0_overview.ipynb | 12 ++-- {notebooks => docs/notebooks}/L1_Batch.ipynb | 20 +++---- {notebooks => docs/notebooks}/L2_Buffer.ipynb | 20 +++---- .../L3_Vectorized__Environment.ipynb | 18 +++--- {notebooks => docs/notebooks}/L4_Policy.ipynb | 52 +++++++++--------- .../notebooks}/L5_Collector.ipynb | 16 +++--- .../notebooks}/L6_Trainer.ipynb | 14 ++--- .../notebooks}/L7_Experiment.ipynb | 4 +- {notebooks => docs/notebooks}/_config.yml | 0 .../_static/images/tianshou-logo.png | Bin {notebooks => docs/notebooks}/_toc.yml | 0 {notebooks => docs/notebooks}/intro.md | 0 docs/spelling_wordlist.txt | 7 +++ 16 files changed, 89 insertions(+), 82 deletions(-) rename {notebooks => docs/notebooks}/L0_overview.ipynb (93%) rename {notebooks => docs/notebooks}/L1_Batch.ipynb (94%) rename {notebooks => docs/notebooks}/L2_Buffer.ipynb (96%) rename {notebooks => docs/notebooks}/L3_Vectorized__Environment.ipynb (93%) rename {notebooks => docs/notebooks}/L4_Policy.ipynb (98%) rename {notebooks => docs/notebooks}/L5_Collector.ipynb (99%) rename {notebooks => docs/notebooks}/L6_Trainer.ipynb (99%) rename {notebooks => docs/notebooks}/L7_Experiment.ipynb (99%) rename {notebooks => docs/notebooks}/_config.yml (100%) rename {notebooks => docs/notebooks}/_static/images/tianshou-logo.png (100%) rename {notebooks => docs/notebooks}/_toc.yml (100%) rename {notebooks => docs/notebooks}/intro.md (100%) diff --git a/.github/workflows/notebooks.yml b/.github/workflows/notebooks.yml index 7c65213..69cd7bf 100644 --- a/.github/workflows/notebooks.yml +++ b/.github/workflows/notebooks.yml @@ -41,4 +41,4 @@ jobs: uses: peaceiris/actions-gh-pages@v3.6.1 with: github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: ./notebooks/_build/html \ No newline at end of file + publish_dir: ./docs/notebooks/_build/html \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3130a95..030dfc4 100644 --- a/.gitignore +++ b/.gitignore @@ -157,4 +157,4 @@ videos/ /temp /temp*.py -notebooks/_build/ +docs/notebooks/_build/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f2afd7f..ae387d3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,8 +28,8 @@ repos: pass_filenames: false - id: poetry-lock-check name: poetry lock check - entry: poetry lock - args: [--check] + entry: poetry check + args: [--lock] language: system pass_filenames: false - id: mypy diff --git a/notebooks/L0_overview.ipynb b/docs/notebooks/L0_overview.ipynb similarity index 93% rename from notebooks/L0_overview.ipynb rename to docs/notebooks/L0_overview.ipynb index 336269d..f9d51e7 100644 --- a/notebooks/L0_overview.ipynb +++ b/docs/notebooks/L0_overview.ipynb @@ -22,7 +22,7 @@ }, "source": [ "## Run the code\n", - "Before we get started, we must first install Tianshou's library and Gym environment by running the commands below. Here I choose a specific version of Tianshou(0.4.8) which is the latest as of the time writing this toturial. APIs in differet versions may vary a little bit but most are the same. Feel free to use other versions in your own project." + "Before we get started, we must first install Tianshou's library and Gym environment by running the commands below. Here I choose a specific version of Tianshou(0.4.8) which is the latest as of the time writing this tutorial. APIs in different versions may vary a little bit but most are the same. Feel free to use other versions in your own project." ] }, { @@ -171,7 +171,7 @@ "In Tianshou, all of these main components are factored out as different building blocks, which you\n", "can use to create your own algorithm and finish your own experiment.\n", "\n", - "Buiding blocks may include:\n", + "Building blocks may include:\n", "- Batch\n", "- Replay Buffer\n", "- Vectorized Environment Wrapper\n", @@ -181,7 +181,7 @@ "- Logger\n", "\n", "\n", - "Check this [webpage](https://tianshou.readthedocs.io/en/master/tutorials/dqn.html) to find jupter-notebook-style tutorials that will guide you through all these\n", + "Check this [webpage](https://tianshou.readthedocs.io/en/master/tutorials/dqn.html) to find jupyter-notebook-style tutorials that will guide you through all these\n", "modules one by one. You can also read the [documentation](https://tianshou.readthedocs.io/en/master/) of Tianshou for more detailed explanation and\n", "advanced usages." ] @@ -192,7 +192,7 @@ "id": "S0mNKwH9i6Ek" }, "source": [ - "# Further reading" + "## Further reading" ] }, { @@ -201,9 +201,9 @@ "id": "M3NPSUnAov4L" }, "source": [ - "## What if I am not familar with the PPO algorithm itself?\n", + "### What if I am not familiar with the PPO algorithm itself?\n", "As for the DRL algorithms themselves, we will refer you to the [Spinning up documentation](https://spinningup.openai.com/en/latest/algorithms/ppo.html), where they provide\n", - "plenty of resources and guides if you want to study the DRL algorithms. In Tianshou's toturials, we will\n", + "plenty of resources and guides if you want to study the DRL algorithms. In Tianshou's tutorials, we will\n", "focus on the usages of different modules, but not the algorithms themselves." ] } diff --git a/notebooks/L1_Batch.ipynb b/docs/notebooks/L1_Batch.ipynb similarity index 94% rename from notebooks/L1_Batch.ipynb rename to docs/notebooks/L1_Batch.ipynb index ced0305..0b4aacb 100644 --- a/notebooks/L1_Batch.ipynb +++ b/docs/notebooks/L1_Batch.ipynb @@ -56,7 +56,7 @@ "A batch is simply a dictionary which stores all passed in data as key-value pairs, and automatically turns the value into a numpy array if possible.\n", "\n", "## Why we need Batch in Tianshou?\n", - "The motivation behind the implementation of Batch module is simple. In DRL, you need to handle a lot of dictionary-format data. For instance most algorithms would reuqire you to store state, action, and reward data for every step when interacting with the environment. All these data can be organised as a dictionary and a Batch module helps Tianshou unify the interface of a diverse set of algorithms. Plus, Batch supports advanced indexing, concantenation and splitting, formatting print just like any other numpy array, which may be very helpful for developers.\n", + "The motivation behind the implementation of Batch module is simple. In DRL, you need to handle a lot of dictionary-format data. For instance most algorithms would require you to store state, action, and reward data for every step when interacting with the environment. All these data can be organized as a dictionary and a Batch module helps Tianshou unify the interface of a diverse set of algorithms. Plus, Batch supports advanced indexing, concatenation and splitting, formatting print just like any other numpy array, which may be very helpful for developers.\n", "
\n", "\n", "\n", @@ -71,7 +71,7 @@ "id": "_Xenx64M9HhV" }, "source": [ - "# Basic Usages" + "## Basic Usages" ] }, { @@ -80,7 +80,7 @@ "id": "4YGX_f1Z9Uil" }, "source": [ - "## Initialisation\n", + "### Initialization\n", "Batch can be converted directly from a python dictionary, and all data structure will be converted to numpy array if possible." ] }, @@ -101,7 +101,7 @@ "batch1 = Batch({\"a\": [4, 4], \"b\": (5, 5)})\n", "print(batch1)\n", "\n", - "# initialisation of batch2 is equivalent to batch1\n", + "# initialization of batch2 is equivalent to batch1\n", "print(\"========================================\")\n", "batch2 = Batch(a=[4, 4], b=(5, 5))\n", "print(batch2)\n", @@ -137,7 +137,7 @@ "id": "JCf6bqY3uf5L" }, "source": [ - "## Getting access to data\n", + "### Getting access to data\n", "You can conveniently search or change the key-value pair in the Batch just as if it is a python dictionary." ] }, @@ -178,7 +178,7 @@ "id": "bVywStbV9jD2" }, "source": [ - "## Indexing and Slicing\n", + "### Indexing and Slicing\n", "If all values in Batch share the same shape in certain dimensions, Batch can support advanced indexing and slicing just like a normal numpy array." ] }, @@ -222,7 +222,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Aggregation and Splitting\n" + "### Aggregation and Splitting\n" ] }, { @@ -278,7 +278,7 @@ "id": "Smc_W1Cx6zRS" }, "source": [ - "## Data type converting\n", + "### Data type converting\n", "Besides numpy array, Batch actually also supports Torch Tensor. The usages are exactly the same. Cool, isn't it?" ] }, @@ -359,7 +359,7 @@ "id": "-vPMiPZ-9kJN" }, "source": [ - "# Further Reading" + "## Further Reading" ] }, { @@ -368,7 +368,7 @@ "id": "8Oc1p8ud9kcu" }, "source": [ - "Would like to learn more advanced usages of Batch? Feel curious about how data is organised inside the Batch? Check the [documentation](https://tianshou.readthedocs.io/en/master/api/tianshou.data.html) and other [tutorials](https://tianshou.readthedocs.io/en/master/tutorials/batch.html#) for more details." + "Would like to learn more advanced usages of Batch? Feel curious about how data is organized inside the Batch? Check the [documentation](https://tianshou.readthedocs.io/en/master/api/tianshou.data.html) and other [tutorials](https://tianshou.readthedocs.io/en/master/tutorials/batch.html#) for more details." ] } ], diff --git a/notebooks/L2_Buffer.ipynb b/docs/notebooks/L2_Buffer.ipynb similarity index 96% rename from notebooks/L2_Buffer.ipynb rename to docs/notebooks/L2_Buffer.ipynb index f9a1f63..6947463 100644 --- a/notebooks/L2_Buffer.ipynb +++ b/docs/notebooks/L2_Buffer.ipynb @@ -18,7 +18,7 @@ "id": "OdesCAxANehZ" }, "source": [ - "# Usages" + "## Usages" ] }, { @@ -27,7 +27,7 @@ "id": "fUbLl9T_SrTR" }, "source": [ - "## Basic usages as a batch\n", + "### Basic usages as a batch\n", "Usually a buffer stores all the data in a batch with circular-queue style." ] }, @@ -154,7 +154,7 @@ "id": "Eqezp0OyXn6J" }, "source": [ - "## Understanding reserved keys for buffer\n", + "### Understanding reserved keys for buffer\n", "As I have explained, ReplayBuffer is specially designed to utilize the implementations of DRL algorithms. So, for convenience, we reserve certain nine reserved keys in Batch.\n", "\n", "* `obs`\n", @@ -182,8 +182,8 @@ "id": "ueAbTspsc6jo" }, "source": [ - "## Data sampling\n", - "We keep a replay buffer in DRL for one purpose:\"sample data from it for training\". `ReplayBuffer.sample()` and `ReplayBuffer.split(..., shuffle=True)` can both fullfill this need." + "### Data sampling\n", + "We keep a replay buffer in DRL for one purpose:\"sample data from it for training\". `ReplayBuffer.sample()` and `ReplayBuffer.split(..., shuffle=True)` can both fulfill this need." ] }, { @@ -358,7 +358,7 @@ "id": "4Wlb57V4lQyQ" }, "source": [ - "Using `ReplayBuffer.prev()`, we know that the earliest step of that episode is step \"3\". Similarly, `ReplayBuffer.next()` helps us indentify the last index of an episode regardless of which kind of ReplayBuffer we are using." + "Using `ReplayBuffer.prev()`, we know that the earliest step of that episode is step \"3\". Similarly, `ReplayBuffer.next()` helps us identify the last index of an episode regardless of which kind of ReplayBuffer we are using." ] }, { @@ -416,8 +416,8 @@ "id": "FEyE0c7tNfwa" }, "source": [ - "# Further Reading\n", - "## Other Buffer Module\n", + "## Further Reading\n", + "### Other Buffer Module\n", "\n", "* PrioritizedReplayBuffer, which helps you implement [prioritized experience replay](https://arxiv.org/abs/1511.05952)\n", "* CachedReplayBuffer, one main buffer with several cached buffers (higher sample efficiency in some scenarios)\n", @@ -425,8 +425,8 @@ "\n", "Check the documentation and the source code for more details.\n", "\n", - "## Support for steps stacking to use RNN in DRL.\n", - "There is an option called `stack_num` (default to 1) when initialising the ReplayBuffer, which may help you use RNN in your algorithm. Check the documentation for details." + "### Support for steps stacking to use RNN in DRL.\n", + "There is an option called `stack_num` (default to 1) when initializing the ReplayBuffer, which may help you use RNN in your algorithm. Check the documentation for details." ] } ], diff --git a/notebooks/L3_Vectorized__Environment.ipynb b/docs/notebooks/L3_Vectorized__Environment.ipynb similarity index 93% rename from notebooks/L3_Vectorized__Environment.ipynb rename to docs/notebooks/L3_Vectorized__Environment.ipynb index c152785..5421248 100644 --- a/notebooks/L3_Vectorized__Environment.ipynb +++ b/docs/notebooks/L3_Vectorized__Environment.ipynb @@ -99,7 +99,7 @@ "id": "S1b6vxp9nEUS" }, "source": [ - "You may notice that the speed doesn't increase linearly when we add subprocess numbers. There are multiple reasons behind this. One reason is that synchronize exection causes straggler effect. One way to solve this would be to use asynchronous mode. We leave this for further reading if you feel interested.\n", + "You may notice that the speed doesn't increase linearly when we add subprocess numbers. There are multiple reasons behind this. One reason is that synchronize exception causes straggler effect. One way to solve this would be to use asynchronous mode. We leave this for further reading if you feel interested.\n", "\n", "Note that SubprocVectorEnv should only be used when the environment exection is slow. In practice, DummyVectorEnv (or raw Gym environment) is actually more efficient for a simple environment like CartPole because now you avoid both straggler effect and the overhead of communication between subprocesses." ] @@ -110,9 +110,9 @@ "id": "Z6yPxdqFp18j" }, "source": [ - "# Usages\n", - "## Initialisation\n", - "Just pass in a list of functions which return the initialised environment upon called." + "## Usages\n", + "### Initialization\n", + "Just pass in a list of functions which return the initialized environment upon called." ] }, { @@ -144,7 +144,7 @@ "id": "X7p8csjdrwIN" }, "source": [ - "## EnvPool supporting\n", + "### EnvPool supporting\n", "Besides integrated environment wrappers, Tianshou also fully supports [EnvPool](https://github.com/sail-sg/envpool/). Explore its Github page yourself." ] }, @@ -154,7 +154,7 @@ "id": "kvIfqh0vqAR5" }, "source": [ - "## Environment exection and resetting\n", + "### Environment exection and resetting\n", "The only difference between Vectorized environments and standard Gym environments is that passed in actions and returned rewards/observations are also vectorized." ] }, @@ -222,8 +222,8 @@ "id": "fekHR1a6X_HB" }, "source": [ - "# Further Reading\n", - "## Other environment wrappers in Tianshou\n", + "## Further Reading\n", + "### Other environment wrappers in Tianshou\n", "\n", "\n", "* ShmemVectorEnv: use share memory instead of pipe based on SubprocVectorEnv;\n", @@ -231,7 +231,7 @@ "\n", "Check the [documentation](https://tianshou.readthedocs.io/en/master/api/tianshou.env.html) for details.\n", "\n", - "## Difference between synchronous and asynchronous mode (How to choose?)\n", + "### Difference between synchronous and asynchronous mode (How to choose?)\n", "Explanation can be found at the [Parallel Sampling](https://tianshou.readthedocs.io/en/master/tutorials/cheatsheet.html#parallel-sampling) tutorial." ] } diff --git a/notebooks/L4_Policy.ipynb b/docs/notebooks/L4_Policy.ipynb similarity index 98% rename from notebooks/L4_Policy.ipynb rename to docs/notebooks/L4_Policy.ipynb index ddb099e..5312179 100644 --- a/notebooks/L4_Policy.ipynb +++ b/docs/notebooks/L4_Policy.ipynb @@ -7,7 +7,7 @@ }, "source": [ "# Policy\n", - "In reinforcement learning, the agent interacts with environments to improve itself. In this tutorial we will concentrate on the agent part. In Tianshou, both the agent and the core DRL algorithm are implementated in the Policy module. Tianshou provides more than 20 Policy modules, each representing one DRL algorithm. See supported algorithms [here](https://github.com/thu-ml/tianshou).\n", + "In reinforcement learning, the agent interacts with environments to improve itself. In this tutorial we will concentrate on the agent part. In Tianshou, both the agent and the core DRL algorithm are implemented in the Policy module. Tianshou provides more than 20 Policy modules, each representing one DRL algorithm. See supported algorithms [here](https://github.com/thu-ml/tianshou).\n", "\n", "
\n", "\n", @@ -24,7 +24,7 @@ "id": "ZqdHYdoJJS51" }, "source": [ - "# Creating your own Policy\n", + "## Creating your own Policy\n", "We will use the simple REINFORCE algorithm Policy to show the implementation of a Policy Module. The Policy we implement here will be a highly scaled-down version of [PGPolicy](https://github.com/thu-ml/tianshou/blob/master/tianshou/policy/modelfree/pg.py) in Tianshou." ] }, @@ -34,7 +34,7 @@ "id": "PWFBgZ4TJkfz" }, "source": [ - "## Initialisation\n", + "### Initialization\n", "Firstly we create the `REINFORCEPolicy` by inheriting from `BasePolicy` in Tianshou." ] }, @@ -149,7 +149,7 @@ "id": "tjtqjt8WRY5e" }, "source": [ - "## Policy.forward()\n", + "### Policy.forward()\n", "According to the equation of REINFORCE algorithm in Spinning Up's [documentation](https://spinningup.openai.com/en/latest/algorithms/vpg.html), we need to map the observation to an action distribution in action space using neural network (`self.actor`).\n", "\n", "
\n", @@ -183,7 +183,7 @@ "id": "CultfOeuTx2V" }, "source": [ - "## Policy.process_fn()\n", + "### Policy.process_fn()\n", "Now that we have defined our actor, if given training data we can set up a loss function and optimize our neural network. However, before that, we must first calculate episodic returns for every step in our training data to construct the REINFORCE loss function.\n", "\n", "Calculating episodic return is not hard, given `ReplayBuffer.next()` allows us to access every reward to go in an episode. A more convenient way would be to simply use the built-in method `BasePolicy.compute_episodic_return()` inherited from BasePolicy.\n" @@ -219,8 +219,8 @@ "id": "7UsdzNaOXPpC" }, "source": [ - "## Policy.learn()\n", - "Data batch returned by `Policy.process_fn()` will flow into `Policy.learn()`. Finall we can construct our loss function and perform the back-propagation." + "### Policy.learn()\n", + "Data batch returned by `Policy.process_fn()` will flow into `Policy.learn()`. Final we can construct our loss function and perform the back-propagation." ] }, { @@ -322,10 +322,10 @@ "id": "xlPAbh0lKti8" }, "source": [ - "# Use the policy\n", + "## Use the policy\n", "Note that `BasePolicy` itself inherits from `torch.nn.Module`. As a result, you can consider all Policy modules as a Torch Module. They share similar APIs.\n", "\n", - "Firstly we will initialise a new REINFORCE policy." + "Firstly we will initialize a new REINFORCE policy." ] }, { @@ -380,7 +380,7 @@ "id": "-RCrsttYgAG-" }, "source": [ - "## Making decision\n", + "### Making decision\n", "Given a batch of observations, the policy can return a batch of actions and other data." ] }, @@ -407,7 +407,7 @@ "id": "swikhnuDfKep" }, "source": [ - "## Save and Load models\n", + "### Save and Load models\n", "Naturally, Tianshou Policy can be saved and loaded like a normal Torch Network." ] }, @@ -429,7 +429,7 @@ "id": "gp8PzOYsg5z-" }, "source": [ - "## Algorithm Updating\n", + "### Algorithm Updating\n", "We have to collect some data and save them in the ReplayBuffer before updating our agent(policy). Typically we use collector to collect data, but we leave this part till later when we have learned the Collector in Tianshou. For now we generate some **fake** data." ] }, @@ -439,7 +439,7 @@ "id": "XrrPxOUAYShR" }, "source": [ - "### Generating fake data\n", + "#### Generating fake data\n", "Firstly, we need to \"pretend\" that we are using the \"Policy\" to collect data. We plan to collect 10 data so that we can update our algorithm." ] }, @@ -516,7 +516,7 @@ "id": "pkxq4gu9bGkt" }, "source": [ - "Now we are pretending to collect the second episode. At step 7 the second episode still does't end, but we are unwilling to wait, so we stop collecting to update the algorithm." + "Now we are pretending to collect the second episode. At step 7 the second episode still doesn't end, but we are unwilling to wait, so we stop collecting to update the algorithm." ] }, { @@ -579,7 +579,7 @@ "id": "55VWhWpkdfEb" }, "source": [ - "### Updates\n", + "#### Updates\n", "Now we have got a replay buffer with 10 data steps in it. We can call `Policy.update()` to train." ] }, @@ -616,7 +616,7 @@ "id": "QJ5krjrcbuiA" }, "source": [ - "# Further Reading\n", + "## Further Reading\n", "\n", "\n" ] @@ -627,8 +627,8 @@ "id": "pmWi3HuXWcV8" }, "source": [ - "## Pre-defined Networks\n", - "Tianshou provides numberous pre-defined networks usually used in DRL so that you don't have to bother yourself. Check this [documentation](https://tianshou.readthedocs.io/en/master/api/tianshou.utils.html#pre-defined-networks) for details." + "### Pre-defined Networks\n", + "Tianshou provides numerous pre-defined networks usually used in DRL so that you don't have to bother yourself. Check this [documentation](https://tianshou.readthedocs.io/en/master/api/tianshou.utils.html#pre-defined-networks) for details." ] }, { @@ -637,7 +637,7 @@ "id": "UPVl5LBEWJ0t" }, "source": [ - "## How to compute GAE on your own?\n", + "### How to compute GAE on your own?\n", "(Note that for this reading you need to understand the calculation of [GAE](https://arxiv.org/abs/1506.02438) advantage first)\n", "\n", "In terms of code implementation, perhaps the most difficult and annoying part is computing GAE advantage. Just now, we use the `self.compute_episodic_return()` method inherited from `BasePolicy` to save us from all those troubles. However, it is still important that we know the details behind this.\n", @@ -671,13 +671,13 @@ "id": "h_5Dt6XwQLXV" }, "source": [ - "In the code above, we sample all the 10 data in the buffer and try to compute the GAE advantage. As we know, we need to estimate the value function of every observation to compute GAE advantage. so the passed in `v_s` is the value of bacth.obs, `v_s_` is the value of bacth.obs_next this is usually computed by:\n", + "In the code above, we sample all the 10 data in the buffer and try to compute the GAE advantage. As we know, we need to estimate the value function of every observation to compute GAE advantage. so the passed in `v_s` is the value of batch.obs, `v_s_` is the value of batch.obs_next this is usually computed by:\n", "\n", - "`v_s = critic(bacth.obs)`,\n", + "`v_s = critic(batch.obs)`,\n", "\n", - "`v_s_ = critic(bacth.obs_next)`,\n", + "`v_s_ = critic(batch.obs_next)`,\n", "\n", - "where uboth `v_s` and `v_s_` are 10 dimensional arrays and `critic` is usually a neural network.\n", + "where both `v_s` and `v_s_` are 10 dimensional arrays and `critic` is usually a neural network.\n", "\n", "After we've got all those values, GAE can be computed following the equation below." ] @@ -705,7 +705,7 @@ "id": "eV6XZaouU7EV" }, "source": [ - "But, if you do follow this equation I refered from the paper. You probably will get a slightly lower performance than you expected. There are at least 3 \"bugs\" in this equation." + "But, if you do follow this equation I referred from the paper. You probably will get a slightly lower performance than you expected. There are at least 3 \"bugs\" in this equation." ] }, { @@ -729,7 +729,7 @@ }, "outputs": [], "source": [ - "# Assume v_s_ is got by calling critic(bacth.obs_next)\n", + "# Assume v_s_ is got by calling critic(batch.obs_next)\n", "v_s_ = np.ones(10)\n", "v_s_ *= ~batch.done\n", "print(v_s_)" @@ -743,7 +743,7 @@ "source": [ "After the fix above, we will perhaps get a more accurate estimate.\n", "\n", - "**Secondly**, you must know when to stop bootstrapping. Usually we stop bootstrapping when we meet a `done` flag. However, in the buffer above, the last (10th) step is not marked by done=True, because the collecting has not finished. We must know all those unfinished steps so that we know when to stop bootstraping.\n", + "**Secondly**, you must know when to stop bootstrapping. Usually we stop bootstrapping when we meet a `done` flag. However, in the buffer above, the last (10th) step is not marked by done=True, because the collecting has not finished. We must know all those unfinished steps so that we know when to stop bootstrapping.\n", "\n", "Luckily, this can be done under the assistance of buffer because buffers in Tianshou not only store data, but also help you manage data trajectories." ] diff --git a/notebooks/L5_Collector.ipynb b/docs/notebooks/L5_Collector.ipynb similarity index 99% rename from notebooks/L5_Collector.ipynb rename to docs/notebooks/L5_Collector.ipynb index 1766e3b..51e9f26 100644 --- a/notebooks/L5_Collector.ipynb +++ b/docs/notebooks/L5_Collector.ipynb @@ -7,7 +7,7 @@ }, "source": [ "# Collector\n", - "From its literal meaning, we can easily know that the Collector in Tianshou is used to collect training data. More specificly, the Collector controls the interaction between Policy (agent) and the environment. It also helps save the interaction data into the ReplayBuffer and returns episode statistics.\n", + "From its literal meaning, we can easily know that the Collector in Tianshou is used to collect training data. More specifically, the Collector controls the interaction between Policy (agent) and the environment. It also helps save the interaction data into the ReplayBuffer and returns episode statistics.\n", "\n", "
\n", "\n", @@ -21,7 +21,7 @@ "id": "OX5cayLv4Ziu" }, "source": [ - "# Usages\n", + "## Usages\n", "Collector can be used both for training (data collecting) and evaluation in Tianshou." ] }, @@ -31,10 +31,10 @@ "id": "Z6XKbj28u8Ze" }, "source": [ - "## Policy evaluation\n", + "### Policy evaluation\n", "We need to evaluate our trained policy from time to time in DRL experiments. Collector can help us with this.\n", "\n", - "First we have to initialise a Collector with an (vectorized) environment and a given policy (agent)." + "First we have to initialize a Collector with an (vectorized) environment and a given policy (agent)." ] }, { @@ -99,7 +99,7 @@ "id": "wmt8vuwpzQdR" }, "source": [ - "Now we would like to collect 9 episodes of data to test how our initialised Policy performs." + "Now we would like to collect 9 episodes of data to test how our initialized Policy performs." ] }, { @@ -157,7 +157,7 @@ "id": "sKQRTiG10ljU" }, "source": [ - "Seems that an initialised policy performs even worse than a random policy without any training." + "Seems that an initialized policy performs even worse than a random policy without any training." ] }, { @@ -166,7 +166,7 @@ "id": "8RKmHIoG1A1k" }, "source": [ - "## Data Collecting\n", + "### Data Collecting\n", "Data collecting is mostly used during training, when we need to store the collected data in a ReplayBuffer." ] }, @@ -255,7 +255,7 @@ "id": "8NP7lOBU3-VS" }, "source": [ - "# Further Reading\n", + "## Further Reading\n", "The above collector actually collects 52 data at a time because 52 % 4 = 0. There is one asynchronous collector which allows you collect exactly 50 steps. Check the [documentation](https://tianshou.readthedocs.io/en/master/api/tianshou.data.html#asynccollector) for details." ] } diff --git a/notebooks/L6_Trainer.ipynb b/docs/notebooks/L6_Trainer.ipynb similarity index 99% rename from notebooks/L6_Trainer.ipynb rename to docs/notebooks/L6_Trainer.ipynb index df6f3d5..038d033 100644 --- a/notebooks/L6_Trainer.ipynb +++ b/docs/notebooks/L6_Trainer.ipynb @@ -19,7 +19,7 @@ "id": "ifsEQMzZ6mmz" }, "source": [ - "# Usages\n", + "## Usages\n", "In Tianshou v0.5.1, there are three types of Trainer. They are designed to be used in on-policy training, off-policy training and offline training respectively. We will use on-policy trainer as an example and leave the other two for further reading." ] }, @@ -29,7 +29,7 @@ "id": "XfsuU2AAE52C" }, "source": [ - "## Pseudocode\n", + "### Pseudocode\n", "![1.PNG]()\n", "\n", "For the on-policy trainer, the main difference is that we clear the buffer after Line 10." @@ -41,7 +41,7 @@ "id": "Hcp_o0CCFz12" }, "source": [ - "## Training without trainer\n", + "### Training without trainer\n", "As we have learned the usages of the Collector and the Policy, it's possible that we write our own training logic.\n", "\n", "First, let us create the instances of Environment, ReplayBuffer, Policy and Collector." @@ -163,7 +163,7 @@ "id": "p-7U_cwgF5Ej" }, "source": [ - "## Training with trainer\n", + "### Training with trainer\n", "The trainer does almost the same thing. The only difference is that it has considered many details and is more modular." ] }, @@ -205,11 +205,11 @@ "id": "_j3aUJZQ7nml" }, "source": [ - "# Further Reading\n", - "## Logger usages\n", + "## Further Reading\n", + "### Logger usages\n", "Tianshou provides experiment loggers that are both tensorboard- and wandb-compatible. It also has a BaseLogger Class which allows you to self-define your own logger. Check the [documentation](https://tianshou.readthedocs.io/en/master/api/tianshou.utils.html#tianshou.utils.BaseLogger) for details.\n", "\n", - "## Learn more about the APIs of Trainers\n", + "### Learn more about the APIs of Trainers\n", "[documentation](https://tianshou.readthedocs.io/en/master/api/tianshou.trainer.html)" ] } diff --git a/notebooks/L7_Experiment.ipynb b/docs/notebooks/L7_Experiment.ipynb similarity index 99% rename from notebooks/L7_Experiment.ipynb rename to docs/notebooks/L7_Experiment.ipynb index 85a8bec..f16a147 100644 --- a/notebooks/L7_Experiment.ipynb +++ b/docs/notebooks/L7_Experiment.ipynb @@ -16,7 +16,7 @@ "id": "2QRbCJvDHNAd" }, "source": [ - "# Experiment\n", + "## Experiment\n", "To conduct this experiment, we need the following building blocks.\n", "\n", "\n", @@ -120,7 +120,7 @@ }, "source": [ "## Policy\n", - "Next we need to initialise our PPO policy. PPO is an actor-critic-style on-policy algorithm, so we have to define the actor and the critic in PPO first.\n", + "Next we need to initialize our PPO policy. PPO is an actor-critic-style on-policy algorithm, so we have to define the actor and the critic in PPO first.\n", "\n", "The actor is a neural network that shares the same network head with the critic. Both networks' input is the environment observation. The output of the actor is the action and the output of the critic is a single value, representing the value of the current policy.\n", "\n", diff --git a/notebooks/_config.yml b/docs/notebooks/_config.yml similarity index 100% rename from notebooks/_config.yml rename to docs/notebooks/_config.yml diff --git a/notebooks/_static/images/tianshou-logo.png b/docs/notebooks/_static/images/tianshou-logo.png similarity index 100% rename from notebooks/_static/images/tianshou-logo.png rename to docs/notebooks/_static/images/tianshou-logo.png diff --git a/notebooks/_toc.yml b/docs/notebooks/_toc.yml similarity index 100% rename from notebooks/_toc.yml rename to docs/notebooks/_toc.yml diff --git a/notebooks/intro.md b/docs/notebooks/intro.md similarity index 100% rename from notebooks/intro.md rename to docs/notebooks/intro.md diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 782d8cd..e3b9581 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -189,3 +189,10 @@ backpropagation dataclass superset picklable +ShmemVectorEnv +Github +wandb +jupyter +img +src +parallelized