From 5910e08672173aaa7e68109467d3d93b4087d251 Mon Sep 17 00:00:00 2001
From: haoshengzou <zouhaosheng@163.com>
Date: Thu, 25 Jan 2018 10:11:36 +0800
Subject: [PATCH] data/utils.py added but not pushed...

---
 tianshou/core/opt.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tianshou/core/opt.py b/tianshou/core/opt.py
index 59e4608..51d9e1c 100644
--- a/tianshou/core/opt.py
+++ b/tianshou/core/opt.py
@@ -15,6 +15,7 @@ def DPG(policy, action_value):
 
     grad_ys = tf.gradients(critic_value_loss, critic_action_input)
     grad_policy_vars = tf.gradients(policy_action_output, trainable_variables, grad_ys=grad_ys)
+    # TODO: this is slightly different from ddpg implementations in baselines, keras-rl and rllab. it uses sampled action (with noise) rather than directly connect the two networks
 
     grads_and_vars = zip(grad_policy_vars, trainable_variables)