minor fix
This commit is contained in:
parent
039c8140e2
commit
92deae9f8d
@ -53,7 +53,7 @@ if __name__ == '__main__':
|
|||||||
action = tf.placeholder(dtype=tf.int32, shape=[None]) # batch of integer actions
|
action = tf.placeholder(dtype=tf.int32, shape=[None]) # batch of integer actions
|
||||||
target = tf.placeholder(dtype=tf.float32, shape=[None]) # target value for DQN
|
target = tf.placeholder(dtype=tf.float32, shape=[None]) # target value for DQN
|
||||||
|
|
||||||
dqn_loss = losses.dqn_loss(action, target, pi) # TongzhengRen
|
dqn_loss = losses.dqn_loss(action, target, q_net) # TongzhengRen
|
||||||
|
|
||||||
total_loss = dqn_loss
|
total_loss = dqn_loss
|
||||||
optimizer = tf.train.AdamOptimizer(1e-3)
|
optimizer = tf.train.AdamOptimizer(1e-3)
|
||||||
@ -61,7 +61,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
# 3. define data collection
|
# 3. define data collection
|
||||||
training_data = Replay(env, q_net, advantage_estimation.qlearning_target(target_net)) #
|
training_data = Replay(env, q_net, advantage_estimation.qlearning_target(target_net)) #
|
||||||
# ShihongSong: Replay(env, pi, advantage_estimation.qlearning_target(target_network)), use your ReplayMemory, interact as follows. Simplify your advantage_estimation.dqn to run before YongRen's DQN
|
# ShihongSong: Replay(env, q_net, advantage_estimation.qlearning_target(target_network)), use your ReplayMemory, interact as follows. Simplify your advantage_estimation.dqn to run before YongRen's DQN
|
||||||
# maybe a dict to manage the elements to be collected
|
# maybe a dict to manage the elements to be collected
|
||||||
|
|
||||||
# 4. start training
|
# 4. start training
|
||||||
|
Loading…
x
Reference in New Issue
Block a user