2020-03-12 22:20:33 +08:00
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BasePolicy(ABC):
|
|
|
|
|
"""docstring for BasePolicy"""
|
2020-03-13 17:49:22 +08:00
|
|
|
|
2020-03-12 22:20:33 +08:00
|
|
|
def __init__(self):
|
|
|
|
|
super().__init__()
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
def act(self, batch, hidden_state=None):
|
|
|
|
|
# return {policy, action, hidden}
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def train(self):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def eval(self):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def reset(self):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
2020-03-13 17:49:22 +08:00
|
|
|
def process_fn(batch, buffer, indice):
|
|
|
|
|
return batch
|
2020-03-12 22:20:33 +08:00
|
|
|
|
|
|
|
|
def exploration(self):
|
|
|
|
|
pass
|