- implement TD3+BC for offline RL; - fix a bug in trainer about test reward not logged because self.env_step is not set for offline setting;
67 lines
2.3 KiB
Python
67 lines
2.3 KiB
Python
"""Policy package."""
|
|
# isort:skip_file
|
|
|
|
from tianshou.policy.base import BasePolicy
|
|
from tianshou.policy.random import RandomPolicy
|
|
from tianshou.policy.modelfree.dqn import DQNPolicy
|
|
from tianshou.policy.modelfree.bdq import BranchingDQNPolicy
|
|
from tianshou.policy.modelfree.c51 import C51Policy
|
|
from tianshou.policy.modelfree.rainbow import RainbowPolicy
|
|
from tianshou.policy.modelfree.qrdqn import QRDQNPolicy
|
|
from tianshou.policy.modelfree.iqn import IQNPolicy
|
|
from tianshou.policy.modelfree.fqf import FQFPolicy
|
|
from tianshou.policy.modelfree.pg import PGPolicy
|
|
from tianshou.policy.modelfree.a2c import A2CPolicy
|
|
from tianshou.policy.modelfree.npg import NPGPolicy
|
|
from tianshou.policy.modelfree.ddpg import DDPGPolicy
|
|
from tianshou.policy.modelfree.ppo import PPOPolicy
|
|
from tianshou.policy.modelfree.trpo import TRPOPolicy
|
|
from tianshou.policy.modelfree.td3 import TD3Policy
|
|
from tianshou.policy.modelfree.sac import SACPolicy
|
|
from tianshou.policy.modelfree.redq import REDQPolicy
|
|
from tianshou.policy.modelfree.discrete_sac import DiscreteSACPolicy
|
|
from tianshou.policy.imitation.base import ImitationPolicy
|
|
from tianshou.policy.imitation.bcq import BCQPolicy
|
|
from tianshou.policy.imitation.cql import CQLPolicy
|
|
from tianshou.policy.imitation.td3_bc import TD3BCPolicy
|
|
from tianshou.policy.imitation.discrete_bcq import DiscreteBCQPolicy
|
|
from tianshou.policy.imitation.discrete_cql import DiscreteCQLPolicy
|
|
from tianshou.policy.imitation.discrete_crr import DiscreteCRRPolicy
|
|
from tianshou.policy.imitation.gail import GAILPolicy
|
|
from tianshou.policy.modelbased.psrl import PSRLPolicy
|
|
from tianshou.policy.modelbased.icm import ICMPolicy
|
|
from tianshou.policy.multiagent.mapolicy import MultiAgentPolicyManager
|
|
|
|
__all__ = [
|
|
"BasePolicy",
|
|
"RandomPolicy",
|
|
"DQNPolicy",
|
|
"BranchingDQNPolicy",
|
|
"C51Policy",
|
|
"RainbowPolicy",
|
|
"QRDQNPolicy",
|
|
"IQNPolicy",
|
|
"FQFPolicy",
|
|
"PGPolicy",
|
|
"A2CPolicy",
|
|
"NPGPolicy",
|
|
"DDPGPolicy",
|
|
"PPOPolicy",
|
|
"TRPOPolicy",
|
|
"TD3Policy",
|
|
"SACPolicy",
|
|
"REDQPolicy",
|
|
"DiscreteSACPolicy",
|
|
"ImitationPolicy",
|
|
"BCQPolicy",
|
|
"CQLPolicy",
|
|
"TD3BCPolicy",
|
|
"DiscreteBCQPolicy",
|
|
"DiscreteCQLPolicy",
|
|
"DiscreteCRRPolicy",
|
|
"GAILPolicy",
|
|
"PSRLPolicy",
|
|
"ICMPolicy",
|
|
"MultiAgentPolicyManager",
|
|
]
|