This is the PR for C51algorithm: https://arxiv.org/abs/1707.06887 1. add C51 policy in tianshou/policy/modelfree/c51.py. 2. add C51 net in tianshou/utils/net/discrete.py. 3. add C51 atari example in examples/atari/atari_c51.py. 4. add C51 statement in tianshou/policy/__init__.py. 5. add C51 test in test/discrete/test_c51.py. 6. add C51 atari results in examples/atari/results/c51/. By running "python3 atari_c51.py --task "PongNoFrameskip-v4" --batch-size 64", get best_result': '20.50 ± 0.50', in epoch 9. By running "python3 atari_c51.py --task "BreakoutNoFrameskip-v4" --n-step 1 --epoch 40", get best_reward: 407.400000 ± 31.155096 in epoch 39.
33 lines
1.0 KiB
Python
33 lines
1.0 KiB
Python
from tianshou.policy.base import BasePolicy
|
|
from tianshou.policy.random import RandomPolicy
|
|
from tianshou.policy.imitation.base import ImitationPolicy
|
|
from tianshou.policy.modelfree.dqn import DQNPolicy
|
|
from tianshou.policy.modelfree.c51 import C51Policy
|
|
from tianshou.policy.modelfree.pg import PGPolicy
|
|
from tianshou.policy.modelfree.a2c import A2CPolicy
|
|
from tianshou.policy.modelfree.ddpg import DDPGPolicy
|
|
from tianshou.policy.modelfree.ppo import PPOPolicy
|
|
from tianshou.policy.modelfree.td3 import TD3Policy
|
|
from tianshou.policy.modelfree.sac import SACPolicy
|
|
from tianshou.policy.modelfree.discrete_sac import DiscreteSACPolicy
|
|
from tianshou.policy.modelbase.psrl import PSRLPolicy
|
|
from tianshou.policy.multiagent.mapolicy import MultiAgentPolicyManager
|
|
|
|
|
|
__all__ = [
|
|
"BasePolicy",
|
|
"RandomPolicy",
|
|
"ImitationPolicy",
|
|
"DQNPolicy",
|
|
"C51Policy",
|
|
"PGPolicy",
|
|
"A2CPolicy",
|
|
"DDPGPolicy",
|
|
"PPOPolicy",
|
|
"TD3Policy",
|
|
"SACPolicy",
|
|
"DiscreteSACPolicy",
|
|
"PSRLPolicy",
|
|
"MultiAgentPolicyManager",
|
|
]
|