parent
ea48cc2989
commit
b7df31f2a7
@ -41,11 +41,11 @@ First of all, you have to make an environment for your agent to interact with. Y
|
|||||||
import gymnasium as gym
|
import gymnasium as gym
|
||||||
import tianshou as ts
|
import tianshou as ts
|
||||||
|
|
||||||
env = gym.make('CartPole-v0')
|
env = gym.make('CartPole-v1')
|
||||||
|
|
||||||
CartPole-v0 includes a cart carrying a pole moving on a track. This is a simple environment with a discrete action space, for which DQN applies. You have to identify whether the action space is continuous or discrete and apply eligible algorithms. DDPG :cite:`DDPG`, for example, could only be applied to continuous action spaces, while almost all other policy gradient methods could be applied to both.
|
CartPole-v1 includes a cart carrying a pole moving on a track. This is a simple environment with a discrete action space, for which DQN applies. You have to identify whether the action space is continuous or discrete and apply eligible algorithms. DDPG :cite:`DDPG`, for example, could only be applied to continuous action spaces, while almost all other policy gradient methods could be applied to both.
|
||||||
|
|
||||||
Here is the detail of useful fields of CartPole-v0:
|
Here is the detail of useful fields of CartPole-v1:
|
||||||
|
|
||||||
- ``state``: the position of the cart, the velocity of the cart, the angle of the pole and the velocity of the tip of the pole;
|
- ``state``: the position of the cart, the velocity of the cart, the angle of the pole and the velocity of the tip of the pole;
|
||||||
- ``action``: can only be one of ``[0, 1, 2]``, for moving the cart left, no move, and right;
|
- ``action``: can only be one of ``[0, 1, 2]``, for moving the cart left, no move, and right;
|
||||||
@ -62,8 +62,8 @@ Setup Vectorized Environment
|
|||||||
If you want to use the original ``gym.Env``:
|
If you want to use the original ``gym.Env``:
|
||||||
::
|
::
|
||||||
|
|
||||||
train_envs = gym.make('CartPole-v0')
|
train_envs = gym.make('CartPole-v1')
|
||||||
test_envs = gym.make('CartPole-v0')
|
test_envs = gym.make('CartPole-v1')
|
||||||
|
|
||||||
Tianshou supports vectorized environment for all algorithms. It provides four types of vectorized environment wrapper:
|
Tianshou supports vectorized environment for all algorithms. It provides four types of vectorized environment wrapper:
|
||||||
|
|
||||||
@ -74,8 +74,8 @@ Tianshou supports vectorized environment for all algorithms. It provides four ty
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
train_envs = ts.env.DummyVectorEnv([lambda: gym.make('CartPole-v0') for _ in range(10)])
|
train_envs = ts.env.DummyVectorEnv([lambda: gym.make('CartPole-v1') for _ in range(10)])
|
||||||
test_envs = ts.env.DummyVectorEnv([lambda: gym.make('CartPole-v0') for _ in range(100)])
|
test_envs = ts.env.DummyVectorEnv([lambda: gym.make('CartPole-v1') for _ in range(100)])
|
||||||
|
|
||||||
Here, we set up 10 environments in ``train_envs`` and 100 environments in ``test_envs``.
|
Here, we set up 10 environments in ``train_envs`` and 100 environments in ``test_envs``.
|
||||||
|
|
||||||
@ -84,8 +84,8 @@ You can also try the super-fast vectorized environment `EnvPool <https://github.
|
|||||||
::
|
::
|
||||||
|
|
||||||
import envpool
|
import envpool
|
||||||
train_envs = envpool.make_gymnasium("CartPole-v0", num_envs=10)
|
train_envs = envpool.make_gymnasium("CartPole-v1", num_envs=10)
|
||||||
test_envs = envpool.make_gymnasium("CartPole-v0", num_envs=100)
|
test_envs = envpool.make_gymnasium("CartPole-v1", num_envs=100)
|
||||||
|
|
||||||
For the demonstration, here we use the second code-block.
|
For the demonstration, here we use the second code-block.
|
||||||
|
|
||||||
|
@ -353,7 +353,7 @@ The general explanation is listed in :ref:`pseudocode`. Other usages of collecto
|
|||||||
::
|
::
|
||||||
|
|
||||||
policy = PGPolicy(...) # or other policies if you wish
|
policy = PGPolicy(...) # or other policies if you wish
|
||||||
env = gym.make("CartPole-v0")
|
env = gym.make("CartPole-v1")
|
||||||
|
|
||||||
replay_buffer = ReplayBuffer(size=10000)
|
replay_buffer = ReplayBuffer(size=10000)
|
||||||
|
|
||||||
@ -363,7 +363,7 @@ The general explanation is listed in :ref:`pseudocode`. Other usages of collecto
|
|||||||
# the collector supports vectorized environments as well
|
# the collector supports vectorized environments as well
|
||||||
vec_buffer = VectorReplayBuffer(total_size=10000, buffer_num=3)
|
vec_buffer = VectorReplayBuffer(total_size=10000, buffer_num=3)
|
||||||
# buffer_num should be equal to (suggested) or larger than #envs
|
# buffer_num should be equal to (suggested) or larger than #envs
|
||||||
envs = DummyVectorEnv([lambda: gym.make("CartPole-v0") for _ in range(3)])
|
envs = DummyVectorEnv([lambda: gym.make("CartPole-v1") for _ in range(3)])
|
||||||
collector = Collector(policy, envs, buffer=vec_buffer)
|
collector = Collector(policy, envs, buffer=vec_buffer)
|
||||||
|
|
||||||
# collect 3 episodes
|
# collect 3 episodes
|
||||||
|
@ -159,7 +159,7 @@ toy_text and classic_control environments. For more information, please refer to
|
|||||||
# install envpool: pip3 install envpool
|
# install envpool: pip3 install envpool
|
||||||
|
|
||||||
import envpool
|
import envpool
|
||||||
envs = envpool.make_gymnasium("CartPole-v0", num_envs=10)
|
envs = envpool.make_gymnasium("CartPole-v1", num_envs=10)
|
||||||
collector = Collector(policy, envs, buffer)
|
collector = Collector(policy, envs, buffer)
|
||||||
|
|
||||||
Here are some other `examples <https://github.com/sail-sg/envpool/tree/master/examples/tianshou_examples>`_.
|
Here are some other `examples <https://github.com/sail-sg/envpool/tree/master/examples/tianshou_examples>`_.
|
||||||
|
@ -180,7 +180,10 @@
|
|||||||
"base_uri": "https://localhost:8080/"
|
"base_uri": "https://localhost:8080/"
|
||||||
},
|
},
|
||||||
"id": "vcvw9J8RNtFE",
|
"id": "vcvw9J8RNtFE",
|
||||||
"outputId": "b483fa8b-2a57-4051-a3d0-6d8162d948c5"
|
"outputId": "b483fa8b-2a57-4051-a3d0-6d8162d948c5",
|
||||||
|
"tags": [
|
||||||
|
"remove-output"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -200,7 +203,17 @@
|
|||||||
" episode_per_test=10,\n",
|
" episode_per_test=10,\n",
|
||||||
" step_per_collect=2000,\n",
|
" step_per_collect=2000,\n",
|
||||||
" batch_size=512,\n",
|
" batch_size=512,\n",
|
||||||
")\n",
|
").run()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
"print(result)"
|
"print(result)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -59,9 +59,6 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"editable": true,
|
"editable": true,
|
||||||
"id": "ao9gWJDiHgG-",
|
"id": "ao9gWJDiHgG-",
|
||||||
"slideshow": {
|
|
||||||
"slide_type": ""
|
|
||||||
},
|
|
||||||
"tags": [
|
"tags": [
|
||||||
"hide-cell",
|
"hide-cell",
|
||||||
"remove-output"
|
"remove-output"
|
||||||
@ -233,8 +230,12 @@
|
|||||||
"colab": {
|
"colab": {
|
||||||
"base_uri": "https://localhost:8080/"
|
"base_uri": "https://localhost:8080/"
|
||||||
},
|
},
|
||||||
|
"editable": true,
|
||||||
"id": "i45EDnpxQ8gu",
|
"id": "i45EDnpxQ8gu",
|
||||||
"outputId": "b1666b88-0bfa-4340-868e-58611872d988"
|
"outputId": "b1666b88-0bfa-4340-868e-58611872d988",
|
||||||
|
"tags": [
|
||||||
|
"remove-output"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -249,7 +250,7 @@
|
|||||||
" batch_size=256,\n",
|
" batch_size=256,\n",
|
||||||
" step_per_collect=2000,\n",
|
" step_per_collect=2000,\n",
|
||||||
" stop_fn=lambda mean_reward: mean_reward >= 195,\n",
|
" stop_fn=lambda mean_reward: mean_reward >= 195,\n",
|
||||||
")"
|
").run()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -270,7 +271,8 @@
|
|||||||
"base_uri": "https://localhost:8080/"
|
"base_uri": "https://localhost:8080/"
|
||||||
},
|
},
|
||||||
"id": "tJCPgmiyiaaX",
|
"id": "tJCPgmiyiaaX",
|
||||||
"outputId": "40123ae3-3365-4782-9563-46c43812f10f"
|
"outputId": "40123ae3-3365-4782-9563-46c43812f10f",
|
||||||
|
"tags": []
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
Loading…
x
Reference in New Issue
Block a user