diff --git a/examples/atari/README.md b/examples/atari/README.md index d89d9f2..d015450 100644 --- a/examples/atari/README.md +++ b/examples/atari/README.md @@ -102,10 +102,10 @@ One epoch here is equal to 100,000 env step, 100 epochs stand for 10M. | task | best reward | reward curve | parameters | | --------------------------- | ----------- | ------------------------------------- | ------------------------------------------------------------ | -| PongNoFrameskip-v4 | 20 | ![](results/ppo/Pong_rew.png) | `python3 atari_ppo.py --task "PongNoFrameskip-v4"` | -| BreakoutNoFrameskip-v4 | 442.1 | ![](results/ppo/Breakout_rew.png) | `python3 atari_ppo.py --task "BreakoutNoFrameskip-v4"` | -| EnduroNoFrameskip-v4 | 1386.4 | ![](results/ppo/Enduro_rew.png) | `python3 atari_ppo.py --task "EnduroNoFrameskip-v4"` | -| QbertNoFrameskip-v4 | 19585 | ![](results/ppo/Qbert_rew.png) | `python3 atari_ppo.py --task "QbertNoFrameskip-v4"` | -| MsPacmanNoFrameskip-v4 | 2319 | ![](results/ppo/MsPacman_rew.png) | `python3 atari_ppo.py --task "MsPacmanNoFrameskip-v4"` | -| SeaquestNoFrameskip-v4 | 1764 | ![](results/ppo/Seaquest_rew.png) | `python3 atari_ppo.py --task "SeaquestNoFrameskip-v4"` | -| SpaceInvadersNoFrameskip-v4 | 1184 | ![](results/ppo/SpaceInvaders_rew.png) | `python3 atari_ppo.py --task "SpaceInvadersNoFrameskip-v4"` | +| PongNoFrameskip-v4 | 20.1 | ![](results/ppo/Pong_rew.png) | `python3 atari_ppo.py --task "PongNoFrameskip-v4"` | +| BreakoutNoFrameskip-v4 | 438.5 | ![](results/ppo/Breakout_rew.png) | `python3 atari_ppo.py --task "BreakoutNoFrameskip-v4"` | +| EnduroNoFrameskip-v4 | 1304.8 | ![](results/ppo/Enduro_rew.png) | `python3 atari_ppo.py --task "EnduroNoFrameskip-v4"` | +| QbertNoFrameskip-v4 | 13640 | ![](results/ppo/Qbert_rew.png) | `python3 atari_ppo.py --task "QbertNoFrameskip-v4"` | +| MsPacmanNoFrameskip-v4 | 1930 | ![](results/ppo/MsPacman_rew.png) | `python3 atari_ppo.py --task "MsPacmanNoFrameskip-v4"` | +| SeaquestNoFrameskip-v4 | 904 | ![](results/ppo/Seaquest_rew.png) | `python3 atari_ppo.py --task "SeaquestNoFrameskip-v4" --lr 2.5e-5` | +| SpaceInvadersNoFrameskip-v4 | 843 | ![](results/ppo/SpaceInvaders_rew.png) | `python3 atari_ppo.py --task "SpaceInvadersNoFrameskip-v4"` | diff --git a/examples/atari/atari_ppo.py b/examples/atari/atari_ppo.py index 668d036..b123f10 100644 --- a/examples/atari/atari_ppo.py +++ b/examples/atari/atari_ppo.py @@ -24,7 +24,7 @@ def get_args(): parser.add_argument('--seed', type=int, default=4213) parser.add_argument('--scale-obs', type=int, default=0) parser.add_argument('--buffer-size', type=int, default=100000) - parser.add_argument('--lr', type=float, default=1e-4) + parser.add_argument('--lr', type=float, default=5e-5) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--epoch', type=int, default=100) parser.add_argument('--step-per-epoch', type=int, default=100000) diff --git a/examples/atari/results/ppo/Breakout_rew.png b/examples/atari/results/ppo/Breakout_rew.png index 8625787..296bb37 100644 Binary files a/examples/atari/results/ppo/Breakout_rew.png and b/examples/atari/results/ppo/Breakout_rew.png differ diff --git a/examples/atari/results/ppo/Enduro_rew.png b/examples/atari/results/ppo/Enduro_rew.png index 50a23fa..b445ba0 100644 Binary files a/examples/atari/results/ppo/Enduro_rew.png and b/examples/atari/results/ppo/Enduro_rew.png differ diff --git a/examples/atari/results/ppo/MsPacman_rew.png b/examples/atari/results/ppo/MsPacman_rew.png index 3483655..c16089d 100644 Binary files a/examples/atari/results/ppo/MsPacman_rew.png and b/examples/atari/results/ppo/MsPacman_rew.png differ diff --git a/examples/atari/results/ppo/Pong_rew.png b/examples/atari/results/ppo/Pong_rew.png index c52fdc2..62d05b2 100644 Binary files a/examples/atari/results/ppo/Pong_rew.png and b/examples/atari/results/ppo/Pong_rew.png differ diff --git a/examples/atari/results/ppo/Qbert_rew.png b/examples/atari/results/ppo/Qbert_rew.png index 03c83dd..8db8b67 100644 Binary files a/examples/atari/results/ppo/Qbert_rew.png and b/examples/atari/results/ppo/Qbert_rew.png differ diff --git a/examples/atari/results/ppo/Seaquest_rew.png b/examples/atari/results/ppo/Seaquest_rew.png index 6750133..200a68e 100644 Binary files a/examples/atari/results/ppo/Seaquest_rew.png and b/examples/atari/results/ppo/Seaquest_rew.png differ diff --git a/examples/atari/results/ppo/SpaceInvaders_rew.png b/examples/atari/results/ppo/SpaceInvaders_rew.png index 4c090a9..93a521e 100644 Binary files a/examples/atari/results/ppo/SpaceInvaders_rew.png and b/examples/atari/results/ppo/SpaceInvaders_rew.png differ