import numpy as np

class evaluator(object):
    def __init__(self, env, action_num):
        self.env = env
        self.action_num = action_num

    def __call__(self, state):
        raise NotImplementedError("Need to implement the evaluator")

class rollout_policy(evaluator):
    def __init__(self, env, action_num):
        super(rollout_policy, self).__init__(env, action_num)
        self.is_terminated = False

    def __call__(self, state):
        # TODO: prior for rollout policy
        while not self.is_terminated:
            action = np.random.randint(0,self.action_num)
            state, is_terminated = self.env.step_forward(state, action)