diff --git a/tianshou/trainer/offpolicy.py b/tianshou/trainer/offpolicy.py index 54b92fd..eb14dfb 100644 --- a/tianshou/trainer/offpolicy.py +++ b/tianshou/trainer/offpolicy.py @@ -117,8 +117,8 @@ def offpolicy_trainer( env_step += int(result["n/st"]) t.update(result["n/st"]) logger.log_train_data(result, env_step) - last_rew = result['rew'] if 'rew' in result else last_rew - last_len = result['len'] if 'len' in result else last_len + last_rew = result['rew'] if result["n/ep"] > 0 else last_rew + last_len = result['len'] if result["n/ep"] > 0 else last_len data = { "env_step": str(env_step), "rew": f"{last_rew:.2f}", diff --git a/tianshou/trainer/onpolicy.py b/tianshou/trainer/onpolicy.py index ad1b4a6..2c539a2 100644 --- a/tianshou/trainer/onpolicy.py +++ b/tianshou/trainer/onpolicy.py @@ -125,8 +125,8 @@ def onpolicy_trainer( env_step += int(result["n/st"]) t.update(result["n/st"]) logger.log_train_data(result, env_step) - last_rew = result['rew'] if 'rew' in result else last_rew - last_len = result['len'] if 'len' in result else last_len + last_rew = result['rew'] if result["n/ep"] > 0 else last_rew + last_len = result['len'] if result["n/ep"] > 0 else last_len data = { "env_step": str(env_step), "rew": f"{last_rew:.2f}",