This commit is contained in:
rtz19970824 2017-12-15 14:24:15 +08:00
commit ea541ed559

View File

@ -61,7 +61,7 @@ if __name__ == '__main__':
# 3. define data collection # 3. define data collection
training_data = Replay(env, q_net, advantage_estimation.qlearning_target(target_net)) # training_data = Replay(env, q_net, advantage_estimation.qlearning_target(target_net)) #
# ShihongSong: Replay(env, pi, advantage_estimation.qlearning_target(target_network)), use your ReplayMemory, interact as follows. Simplify your advantage_estimation.dqn to run before YongRen's DQN # ShihongSong: Replay(env, q_net, advantage_estimation.qlearning_target(target_network)), use your ReplayMemory, interact as follows. Simplify your advantage_estimation.dqn to run before YongRen's DQN
# maybe a dict to manage the elements to be collected # maybe a dict to manage the elements to be collected
# 4. start training # 4. start training