Tianshou/examples/discrete/discrete_dqn_hl.py

from tianshou.highlevel.config import SamplingConfig
from tianshou.highlevel.env import (
    EnvFactoryRegistered,
    VectorEnvType,
)
from tianshou.highlevel.experiment import DQNExperimentBuilder, ExperimentConfig
from tianshou.highlevel.params.policy_params import DQNParams
from tianshou.highlevel.trainer import (
    EpochStopCallbackRewardThreshold,
    EpochTestCallbackDQNSetEps,
    EpochTrainCallbackDQNSetEps,
)
from tianshou.utils.logging import run_main


def main() -> None:
    experiment = (
        DQNExperimentBuilder(
            EnvFactoryRegistered(
                task="CartPole-v1",
                venv_type=VectorEnvType.DUMMY,
                train_seed=0,
                test_seed=10,
            ),
            ExperimentConfig(
                persistence_enabled=False,
                watch=True,
                watch_render=1 / 35,
                watch_num_episodes=100,
            ),
            SamplingConfig(
                num_epochs=10,
                step_per_epoch=10000,
                batch_size=64,
                num_train_envs=10,
                num_test_envs=100,
                buffer_size=20000,
                step_per_collect=10,
                update_per_step=1 / 10,
            ),
        )
        .with_dqn_params(
            DQNParams(
                lr=1e-3,
                discount_factor=0.9,
                estimation_step=3,
                target_update_freq=320,
            ),
        )
        .with_model_factory_default(hidden_sizes=(64, 64))
        .with_epoch_train_callback(EpochTrainCallbackDQNSetEps(0.3))
        .with_epoch_test_callback(EpochTestCallbackDQNSetEps(0.0))
        .with_epoch_stop_callback(EpochStopCallbackRewardThreshold(195))
        .build()
    )
    experiment.run()


if __name__ == "__main__":
    run_main(main)
Add high-level discrete example (CartPole) for README 2024-01-08 18:12:43 +01:00			`from tianshou.highlevel.config import SamplingConfig`
			`from tianshou.highlevel.env import (`
Refactoring, improving class name EnvFactoryGymnasium -> EnvFactoryRegistered 2024-01-16 12:22:07 +01:00			`EnvFactoryRegistered,`
Add high-level discrete example (CartPole) for README 2024-01-08 18:12:43 +01:00			`VectorEnvType,`
			`)`
			`from tianshou.highlevel.experiment import DQNExperimentBuilder, ExperimentConfig`
			`from tianshou.highlevel.params.policy_params import DQNParams`
			`from tianshou.highlevel.trainer import (`
			`EpochStopCallbackRewardThreshold,`
			`EpochTestCallbackDQNSetEps,`
			`EpochTrainCallbackDQNSetEps,`
			`)`
			`from tianshou.utils.logging import run_main`


Refactoring/mypy issues test (#1017) Improves typing in examples and tests, towards mypy passing there. Introduces the SpaceInfo utility 2024-02-06 14:24:30 +01:00			`def main() -> None:`
Add high-level discrete example (CartPole) for README 2024-01-08 18:12:43 +01:00			`experiment = (`
			`DQNExperimentBuilder(`
Feature/algo eval (#1074) # Changes ## Dependencies - New extra "eval" ## Api Extension - `Experiment` and `ExperimentConfig` now have a `name`, that can however be overridden when `Experiment.run()` is called - When building an `Experiment` from an `ExperimentConfig`, the user has the option to add info about seeds to the name. - New method in `ExperimentConfig` called `build_default_seeded_experiments` - `SamplingConfig` has an explicit training seed, `test_seed` is inferred. - New `evaluation` package for repeating the same experiment with multiple seeds and aggregating the results (important extension!). Currently in alpha state. - Loggers can now restore the logged data into python by using the new `restore_logged_data` ## Breaking Changes - `AtariEnvFactory` (in examples) now receives explicit train and test seeds - `EnvFactoryRegistered` now requires an explicit `test_seed` - `BaseLogger.prepare_dict_for_logging` is now abstract --------- Co-authored-by: Maximilian Huettenrauch <m.huettenrauch@appliedai.de> Co-authored-by: Michael Panchenko <m.panchenko@appliedai.de> Co-authored-by: Michael Panchenko <35432522+MischaPanch@users.noreply.github.com> 2024-04-21 01:25:33 +02:00			`EnvFactoryRegistered(`
			`task="CartPole-v1",`
			`venv_type=VectorEnvType.DUMMY,`
			`train_seed=0,`
			`test_seed=10,`
			`),`
Add high-level discrete example (CartPole) for README 2024-01-08 18:12:43 +01:00			`ExperimentConfig(`
			`persistence_enabled=False,`
			`watch=True,`
			`watch_render=1 / 35,`
			`watch_num_episodes=100,`
			`),`
			`SamplingConfig(`
			`num_epochs=10,`
			`step_per_epoch=10000,`
			`batch_size=64,`
			`num_train_envs=10,`
			`num_test_envs=100,`
			`buffer_size=20000,`
			`step_per_collect=10,`
			`update_per_step=1 / 10,`
			`),`
			`)`
			`.with_dqn_params(`
			`DQNParams(`
			`lr=1e-3,`
			`discount_factor=0.9,`
			`estimation_step=3,`
			`target_update_freq=320,`
			`),`
			`)`
			`.with_model_factory_default(hidden_sizes=(64, 64))`
			`.with_epoch_train_callback(EpochTrainCallbackDQNSetEps(0.3))`
			`.with_epoch_test_callback(EpochTestCallbackDQNSetEps(0.0))`
			`.with_epoch_stop_callback(EpochStopCallbackRewardThreshold(195))`
			`.build()`
			`)`
			`experiment.run()`


			`if __name__ == "__main__":`
			`run_main(main)`