From a8a12f10834da6de62207236cf372fc34d4a03e4 Mon Sep 17 00:00:00 2001
From: rtz19970824 <1289226405@qq.com>
Date: Sun, 10 Dec 2017 14:23:40 +0800
Subject: [PATCH] coding style

---
 README.md               | 13 +++++++------
 tianshou/core/losses.py |  6 +++++-
 2 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/README.md b/README.md
index d345241..fde2d48 100644
--- a/README.md
+++ b/README.md
@@ -41,15 +41,16 @@ Tianshou(天授) is a reinforcement learning platform. The following image illus
 
 <img src="https://github.com/sproblvem/tianshou/blob/master/docs/figures/go.png" height="150"/> <img src="https://github.com/sproblvem/tianshou/blob/master/docs/figures/reversi.jpg" height="150"/> <img src="https://github.com/sproblvem/tianshou/blob/master/docs/figures/warzone.jpg" height="150"/>
 
+
+## About coding style
+
+You can follow (google python coding style)[https://google.github.io/styleguide/pyguide.html]
+
+The file should all be named with lower case letters and underline.
+
 ## TODO
 Search based method parallel.
 
-`Please Write comments.`
-
-`Please do not use abbreviations unless others can know it well. (e.g. adv can short for advantage/adversarial, please use the full name instead)`
-
-`Please name the module formally. (e.g. use more lower case and "_", I think a module called "Batch" is terrible)`
-
 YongRen: Policy Wrapper, in order of Gaussian, DQN and DDPG
 
 TongzhengRen: losses, in order of ppo, pg, DQN, DDPG with management of placeholders
diff --git a/tianshou/core/losses.py b/tianshou/core/losses.py
index 7ee564a..c38168f 100644
--- a/tianshou/core/losses.py
+++ b/tianshou/core/losses.py
@@ -22,4 +22,8 @@ def entropy_reg(pi):
 def KL_diff(pi, pi_old):
     kloldnew = pi_old.pd.kl(pi.pd)
     meankl = U.mean(kloldnew)
-    return meankl
\ No newline at end of file
+    return meankl
+
+
+def vanilla_policy_gradient():
+    pass