import numpy as np


class evaluator(object):
    def __init__(self, env, action_num):
        self.env = env
        self.action_num = action_num

    def __call__(self, state):
        raise NotImplementedError("Need to implement the evaluator")


class rollout_policy(evaluator):
    def __init__(self, env, action_num):
        super(rollout_policy, self).__init__(env, action_num)
        self.is_terminated = False

    def __call__(self, state):
        # TODO: prior for rollout policy
        total_reward = 0.
        action = np.random.randint(0, self.action_num)
        state, reward = self.env.simulate_step_forward(state, action)
        total_reward += reward
        while state is not None:
            action = np.random.randint(0, self.action_num)
            state, reward = self.env.simulate_step_forward(state, action)
            total_reward += reward
        return np.ones([self.action_num])/self.action_num, total_reward