diff --git a/examples/dqn_example.py b/examples/dqn_example.py index 0a5c084..4fbe466 100644 --- a/examples/dqn_example.py +++ b/examples/dqn_example.py @@ -53,7 +53,7 @@ if __name__ == '__main__': action = tf.placeholder(dtype=tf.int32, shape=[None]) # batch of integer actions target = tf.placeholder(dtype=tf.float32, shape=[None]) # target value for DQN - dqn_loss = losses.dqn_loss(action, target, pi) # TongzhengRen + dqn_loss = losses.dqn_loss(action, target, q_net) # TongzhengRen total_loss = dqn_loss optimizer = tf.train.AdamOptimizer(1e-3) @@ -61,7 +61,7 @@ if __name__ == '__main__': # 3. define data collection training_data = Replay(env, q_net, advantage_estimation.qlearning_target(target_net)) # - # ShihongSong: Replay(env, pi, advantage_estimation.qlearning_target(target_network)), use your ReplayMemory, interact as follows. Simplify your advantage_estimation.dqn to run before YongRen's DQN + # ShihongSong: Replay(env, q_net, advantage_estimation.qlearning_target(target_network)), use your ReplayMemory, interact as follows. Simplify your advantage_estimation.dqn to run before YongRen's DQN # maybe a dict to manage the elements to be collected # 4. start training