# Changes ## Dependencies - New extra "eval" ## Api Extension - `Experiment` and `ExperimentConfig` now have a `name`, that can however be overridden when `Experiment.run()` is called - When building an `Experiment` from an `ExperimentConfig`, the user has the option to add info about seeds to the name. - New method in `ExperimentConfig` called `build_default_seeded_experiments` - `SamplingConfig` has an explicit training seed, `test_seed` is inferred. - New `evaluation` package for repeating the same experiment with multiple seeds and aggregating the results (important extension!). Currently in alpha state. - Loggers can now restore the logged data into python by using the new `restore_logged_data` ## Breaking Changes - `AtariEnvFactory` (in examples) now receives explicit train and test seeds - `EnvFactoryRegistered` now requires an explicit `test_seed` - `BaseLogger.prepare_dict_for_logging` is now abstract --------- Co-authored-by: Maximilian Huettenrauch <m.huettenrauch@appliedai.de> Co-authored-by: Michael Panchenko <m.panchenko@appliedai.de> Co-authored-by: Michael Panchenko <35432522+MischaPanch@users.noreply.github.com>
62 lines
1.8 KiB
Python
62 lines
1.8 KiB
Python
from tianshou.highlevel.config import SamplingConfig
|
|
from tianshou.highlevel.env import (
|
|
EnvFactoryRegistered,
|
|
VectorEnvType,
|
|
)
|
|
from tianshou.highlevel.experiment import DQNExperimentBuilder, ExperimentConfig
|
|
from tianshou.highlevel.params.policy_params import DQNParams
|
|
from tianshou.highlevel.trainer import (
|
|
EpochStopCallbackRewardThreshold,
|
|
EpochTestCallbackDQNSetEps,
|
|
EpochTrainCallbackDQNSetEps,
|
|
)
|
|
from tianshou.utils.logging import run_main
|
|
|
|
|
|
def main() -> None:
|
|
experiment = (
|
|
DQNExperimentBuilder(
|
|
EnvFactoryRegistered(
|
|
task="CartPole-v1",
|
|
seed=0,
|
|
venv_type=VectorEnvType.DUMMY,
|
|
train_seed=0,
|
|
test_seed=10,
|
|
),
|
|
ExperimentConfig(
|
|
persistence_enabled=False,
|
|
watch=True,
|
|
watch_render=1 / 35,
|
|
watch_num_episodes=100,
|
|
),
|
|
SamplingConfig(
|
|
num_epochs=10,
|
|
step_per_epoch=10000,
|
|
batch_size=64,
|
|
num_train_envs=10,
|
|
num_test_envs=100,
|
|
buffer_size=20000,
|
|
step_per_collect=10,
|
|
update_per_step=1 / 10,
|
|
),
|
|
)
|
|
.with_dqn_params(
|
|
DQNParams(
|
|
lr=1e-3,
|
|
discount_factor=0.9,
|
|
estimation_step=3,
|
|
target_update_freq=320,
|
|
),
|
|
)
|
|
.with_model_factory_default(hidden_sizes=(64, 64))
|
|
.with_epoch_train_callback(EpochTrainCallbackDQNSetEps(0.3))
|
|
.with_epoch_test_callback(EpochTestCallbackDQNSetEps(0.0))
|
|
.with_epoch_stop_callback(EpochStopCallbackRewardThreshold(195))
|
|
.build()
|
|
)
|
|
experiment.run()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run_main(main)
|