From b7df31f2a7a0d7783c89de0aedd3b162dbcdad8b Mon Sep 17 00:00:00 2001
From: Carlo Cagnetta <50958143+carlocagnetta@users.noreply.github.com>
Date: Thu, 14 Dec 2023 19:31:53 +0100
Subject: [PATCH] Docs/fix trainer fct notebooks (#1009)

This PR resolves #1008
---
 docs/01_tutorials/00_dqn.rst          | 18 +++++++++---------
 docs/01_tutorials/01_concepts.rst     |  4 ++--
 docs/01_tutorials/07_cheatsheet.rst   |  2 +-
 docs/02_notebooks/L6_Trainer.ipynb    | 17 +++++++++++++++--
 docs/02_notebooks/L7_Experiment.ipynb | 14 ++++++++------
 5 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/docs/01_tutorials/00_dqn.rst b/docs/01_tutorials/00_dqn.rst
index 79d0985..263ee37 100644
--- a/docs/01_tutorials/00_dqn.rst
+++ b/docs/01_tutorials/00_dqn.rst
@@ -41,11 +41,11 @@ First of all, you have to make an environment for your agent to interact with. Y
     import gymnasium as gym
     import tianshou as ts
 
-    env = gym.make('CartPole-v0')
+    env = gym.make('CartPole-v1')
 
-CartPole-v0 includes a cart carrying a pole moving on a track. This is a simple environment with a discrete action space, for which DQN applies. You have to identify whether the action space is continuous or discrete and apply eligible algorithms. DDPG :cite:`DDPG`, for example, could only be applied to continuous action spaces, while almost all other policy gradient methods could be applied to both.
+CartPole-v1 includes a cart carrying a pole moving on a track. This is a simple environment with a discrete action space, for which DQN applies. You have to identify whether the action space is continuous or discrete and apply eligible algorithms. DDPG :cite:`DDPG`, for example, could only be applied to continuous action spaces, while almost all other policy gradient methods could be applied to both.
 
-Here is the detail of useful fields of CartPole-v0:
+Here is the detail of useful fields of CartPole-v1:
 
 - ``state``: the position of the cart, the velocity of the cart, the angle of the pole and the velocity of the tip of the pole;
 - ``action``: can only be one of ``[0, 1, 2]``, for moving the cart left, no move, and right;
@@ -62,8 +62,8 @@ Setup Vectorized Environment
 If you want to use the original ``gym.Env``:
 ::
 
-    train_envs = gym.make('CartPole-v0')
-    test_envs = gym.make('CartPole-v0')
+    train_envs = gym.make('CartPole-v1')
+    test_envs = gym.make('CartPole-v1')
 
 Tianshou supports vectorized environment for all algorithms. It provides four types of vectorized environment wrapper:
 
@@ -74,8 +74,8 @@ Tianshou supports vectorized environment for all algorithms. It provides four ty
 
 ::
 
-    train_envs = ts.env.DummyVectorEnv([lambda: gym.make('CartPole-v0') for _ in range(10)])
-    test_envs = ts.env.DummyVectorEnv([lambda: gym.make('CartPole-v0') for _ in range(100)])
+    train_envs = ts.env.DummyVectorEnv([lambda: gym.make('CartPole-v1') for _ in range(10)])
+    test_envs = ts.env.DummyVectorEnv([lambda: gym.make('CartPole-v1') for _ in range(100)])
 
 Here, we set up 10 environments in ``train_envs`` and 100 environments in ``test_envs``.
 
@@ -84,8 +84,8 @@ You can also try the super-fast vectorized environment `EnvPool <https://github.
 ::
 
     import envpool
-    train_envs = envpool.make_gymnasium("CartPole-v0", num_envs=10)
-    test_envs = envpool.make_gymnasium("CartPole-v0", num_envs=100)
+    train_envs = envpool.make_gymnasium("CartPole-v1", num_envs=10)
+    test_envs = envpool.make_gymnasium("CartPole-v1", num_envs=100)
 
 For the demonstration, here we use the second code-block.
 
diff --git a/docs/01_tutorials/01_concepts.rst b/docs/01_tutorials/01_concepts.rst
index 5486098..ceef2e8 100644
--- a/docs/01_tutorials/01_concepts.rst
+++ b/docs/01_tutorials/01_concepts.rst
@@ -353,7 +353,7 @@ The general explanation is listed in :ref:`pseudocode`. Other usages of collecto
 ::
 
     policy = PGPolicy(...)  # or other policies if you wish
-    env = gym.make("CartPole-v0")
+    env = gym.make("CartPole-v1")
 
     replay_buffer = ReplayBuffer(size=10000)
 
@@ -363,7 +363,7 @@ The general explanation is listed in :ref:`pseudocode`. Other usages of collecto
     # the collector supports vectorized environments as well
     vec_buffer = VectorReplayBuffer(total_size=10000, buffer_num=3)
     # buffer_num should be equal to (suggested) or larger than #envs
-    envs = DummyVectorEnv([lambda: gym.make("CartPole-v0") for _ in range(3)])
+    envs = DummyVectorEnv([lambda: gym.make("CartPole-v1") for _ in range(3)])
     collector = Collector(policy, envs, buffer=vec_buffer)
 
     # collect 3 episodes
diff --git a/docs/01_tutorials/07_cheatsheet.rst b/docs/01_tutorials/07_cheatsheet.rst
index 1d2c69a..51fece1 100644
--- a/docs/01_tutorials/07_cheatsheet.rst
+++ b/docs/01_tutorials/07_cheatsheet.rst
@@ -159,7 +159,7 @@ toy_text and classic_control environments. For more information, please refer to
     # install envpool: pip3 install envpool
 
     import envpool
-    envs = envpool.make_gymnasium("CartPole-v0", num_envs=10)
+    envs = envpool.make_gymnasium("CartPole-v1", num_envs=10)
     collector = Collector(policy, envs, buffer)
 
 Here are some other `examples <https://github.com/sail-sg/envpool/tree/master/examples/tianshou_examples>`_.
diff --git a/docs/02_notebooks/L6_Trainer.ipynb b/docs/02_notebooks/L6_Trainer.ipynb
index 308e35a..db6b0fb 100644
--- a/docs/02_notebooks/L6_Trainer.ipynb
+++ b/docs/02_notebooks/L6_Trainer.ipynb
@@ -180,7 +180,10 @@
      "base_uri": "https://localhost:8080/"
     },
     "id": "vcvw9J8RNtFE",
-    "outputId": "b483fa8b-2a57-4051-a3d0-6d8162d948c5"
+    "outputId": "b483fa8b-2a57-4051-a3d0-6d8162d948c5",
+    "tags": [
+     "remove-output"
+    ]
    },
    "outputs": [],
    "source": [
@@ -200,7 +203,17 @@
     "    episode_per_test=10,\n",
     "    step_per_collect=2000,\n",
     "    batch_size=512,\n",
-    ")\n",
+    ").run()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
     "print(result)"
    ]
   },
diff --git a/docs/02_notebooks/L7_Experiment.ipynb b/docs/02_notebooks/L7_Experiment.ipynb
index 86df486..55a3be1 100644
--- a/docs/02_notebooks/L7_Experiment.ipynb
+++ b/docs/02_notebooks/L7_Experiment.ipynb
@@ -59,9 +59,6 @@
    "metadata": {
     "editable": true,
     "id": "ao9gWJDiHgG-",
-    "slideshow": {
-     "slide_type": ""
-    },
     "tags": [
      "hide-cell",
      "remove-output"
@@ -233,8 +230,12 @@
     "colab": {
      "base_uri": "https://localhost:8080/"
     },
+    "editable": true,
     "id": "i45EDnpxQ8gu",
-    "outputId": "b1666b88-0bfa-4340-868e-58611872d988"
+    "outputId": "b1666b88-0bfa-4340-868e-58611872d988",
+    "tags": [
+     "remove-output"
+    ]
    },
    "outputs": [],
    "source": [
@@ -249,7 +250,7 @@
     "    batch_size=256,\n",
     "    step_per_collect=2000,\n",
     "    stop_fn=lambda mean_reward: mean_reward >= 195,\n",
-    ")"
+    ").run()"
    ]
   },
   {
@@ -270,7 +271,8 @@
      "base_uri": "https://localhost:8080/"
     },
     "id": "tJCPgmiyiaaX",
-    "outputId": "40123ae3-3365-4782-9563-46c43812f10f"
+    "outputId": "40123ae3-3365-4782-9563-46c43812f10f",
+    "tags": []
    },
    "outputs": [],
    "source": [