diff --git a/README.md b/README.md index d82b9df..dcfc4f3 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,9 @@ Tianshou(天授) is a reinforcement learning platform. The following image illus ## TODO Search based method parallel. + Write comments. Please do not use abbreviations unless others can know it well. (e.g. adv can short for advantage/adversarial, please use the full name instead) + Please name the module formally. (e.g. use more lower case "_", I think a module called "Batch" seems terrible) + YongRen: Policy Wrapper, in order of Gaussian, DQN and DDPG TongzhengRen: losses, in order of ppo, pg, DQN, DDPG with management of placeholders diff --git a/tianshou/data/Batch.py b/tianshou/data/Batch.py index 6ae251f..6b33c1b 100644 --- a/tianshou/data/Batch.py +++ b/tianshou/data/Batch.py @@ -2,6 +2,7 @@ import numpy as np import gc +# TODO: Refactor with tf.train.slice_input_producer, tf.train.Coordinator, tf.train.QueueRunner class Batch(object): """ class for batch datasets. Collect multiple states (actions, rewards, etc.) on-policy.