Fixed the mapolicy train issue (#968)

The trained MARL policies were not performing as expected because the
parent class (MultiAgentPolicyManager) needed a train function.

Fixes thu-ml/tianshou#967
This commit is contained in:
Fahmid Morshed Fahid 2023-10-16 20:52:07 -04:00 committed by GitHub
parent 66b7fc542b
commit bf7841078d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,4 +1,4 @@
from typing import Any, Literal
from typing import Any, Literal, Self
import numpy as np
@ -230,3 +230,11 @@ class MultiAgentPolicyManager(BasePolicy):
for k, v in out.items():
results[agent_id + "/" + k] = v
return results
# Need a train method that set all sub-policies to train mode.
# No need for a similar eval function, as eval internally uses the train function.
def train(self, mode: bool = True) -> Self:
"""Set each internal policy in training mode."""
for policy in self.policies.values():
policy.train(mode)
return self