From 74f430ea366b547d1fb9fa6e6bad373aac28be87 Mon Sep 17 00:00:00 2001 From: Alex Nikulkov Date: Tue, 8 Mar 2022 14:38:42 -0800 Subject: [PATCH] Add a comment before SAC alpha loss (#565) Co-authored-by: Jiayi Weng --- tianshou/data/collector.py | 2 +- tianshou/policy/modelfree/sac.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tianshou/data/collector.py b/tianshou/data/collector.py index 0066825..213b49d 100644 --- a/tianshou/data/collector.py +++ b/tianshou/data/collector.py @@ -364,6 +364,7 @@ class AsyncCollector(Collector): exploration_noise: bool = False, ) -> None: # assert env.is_async + warnings.warn("Using async setting may collect extra transitions into buffer.") super().__init__(policy, env, buffer, preprocess_fn, exploration_noise) def reset_env(self) -> None: @@ -424,7 +425,6 @@ class AsyncCollector(Collector): "Please specify at least one (either n_step or n_episode) " "in AsyncCollector.collect()." ) - warnings.warn("Using async setting may collect extra transitions into buffer.") ready_env_ids = self._ready_env_ids diff --git a/tianshou/policy/modelfree/sac.py b/tianshou/policy/modelfree/sac.py index fc89cf3..03469dc 100644 --- a/tianshou/policy/modelfree/sac.py +++ b/tianshou/policy/modelfree/sac.py @@ -174,6 +174,7 @@ class SACPolicy(DDPGPolicy): if self._is_auto_alpha: log_prob = obs_result.log_prob.detach() + self._target_entropy + # please take a look at issue #258 if you'd like to change this line alpha_loss = -(self._log_alpha * log_prob).mean() self._alpha_optim.zero_grad() alpha_loss.backward()