minor fix
This commit is contained in:
parent
f496725437
commit
9ed3e7b092
@ -53,7 +53,7 @@ if __name__ == '__main__':
|
||||
action = tf.placeholder(dtype=tf.int32, shape=[None]) # batch of integer actions
|
||||
target = tf.placeholder(dtype=tf.float32, shape=[None]) # target value for DQN
|
||||
|
||||
dqn_loss = losses.dqn_loss(action, target, pi) # TongzhengRen
|
||||
dqn_loss = losses.dqn_loss(action, target, q_net) # TongzhengRen
|
||||
|
||||
total_loss = dqn_loss
|
||||
optimizer = tf.train.AdamOptimizer(1e-3)
|
||||
@ -61,7 +61,7 @@ if __name__ == '__main__':
|
||||
|
||||
# 3. define data collection
|
||||
training_data = Replay(env, q_net, advantage_estimation.qlearning_target(target_net)) #
|
||||
# ShihongSong: Replay(env, pi, advantage_estimation.qlearning_target(target_network)), use your ReplayMemory, interact as follows. Simplify your advantage_estimation.dqn to run before YongRen's DQN
|
||||
# ShihongSong: Replay(env, q_net, advantage_estimation.qlearning_target(target_network)), use your ReplayMemory, interact as follows. Simplify your advantage_estimation.dqn to run before YongRen's DQN
|
||||
# maybe a dict to manage the elements to be collected
|
||||
|
||||
# 4. start training
|
||||
|
Loading…
x
Reference in New Issue
Block a user