From 5f2f05a57009cde33deccf83925d6963a2225662 Mon Sep 17 00:00:00 2001 From: Trinkle23897 <463003665@qq.com> Date: Sat, 13 Jun 2020 17:06:08 +0800 Subject: [PATCH] fix #40 --- tianshou/policy/modelfree/sac.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tianshou/policy/modelfree/sac.py b/tianshou/policy/modelfree/sac.py index 9dd4b8a..2885fe1 100644 --- a/tianshou/policy/modelfree/sac.py +++ b/tianshou/policy/modelfree/sac.py @@ -101,7 +101,8 @@ class SACPolicy(DDPGPolicy): y = torch.tanh(x) act = y * self._action_scale + self._action_bias log_prob = dist.log_prob(x) - torch.log( - self._action_scale * (1 - y.pow(2)) + self.__eps) + self._action_scale * (1 - y.pow(2)) + self.__eps + ).sum(-1, keepdim=True) act = act.clamp(self._range[0], self._range[1]) return Batch( logits=logits, act=act, state=h, dist=dist, log_prob=log_prob)