diff --git a/README.md b/README.md index d345241..fde2d48 100644 --- a/README.md +++ b/README.md @@ -41,15 +41,16 @@ Tianshou(天授) is a reinforcement learning platform. The following image illus + +## About coding style + +You can follow (google python coding style)[https://google.github.io/styleguide/pyguide.html] + +The file should all be named with lower case letters and underline. + ## TODO Search based method parallel. -`Please Write comments.` - -`Please do not use abbreviations unless others can know it well. (e.g. adv can short for advantage/adversarial, please use the full name instead)` - -`Please name the module formally. (e.g. use more lower case and "_", I think a module called "Batch" is terrible)` - YongRen: Policy Wrapper, in order of Gaussian, DQN and DDPG TongzhengRen: losses, in order of ppo, pg, DQN, DDPG with management of placeholders diff --git a/tianshou/core/losses.py b/tianshou/core/losses.py index 7ee564a..c38168f 100644 --- a/tianshou/core/losses.py +++ b/tianshou/core/losses.py @@ -22,4 +22,8 @@ def entropy_reg(pi): def KL_diff(pi, pi_old): kloldnew = pi_old.pd.kl(pi.pd) meankl = U.mean(kloldnew) - return meankl \ No newline at end of file + return meankl + + +def vanilla_policy_gradient(): + pass