Tianshou/AlphaGo/reversi.py

import numpy as np
import copy
'''
Settings of the Reversi game.

(1, 1) is considered as the upper left corner of the board,
(size, 1) is the lower left
'''


class Reversi:
    def __init__(self, **kwargs):
        self.size = kwargs['size']

    def _deflatten(self, idx):
        x = idx // self.size + 1
        y = idx % self.size + 1
        return (x, y)

    def _flatten(self, vertex):
        x, y = vertex
        if (x == 0) and (y == 0):
            return self.size ** 2
        return (x - 1) * self.size + (y - 1)

    def get_board(self):
        board = np.zeros([self.size, self.size], dtype=np.int32)
        board[self.size / 2 - 1, self.size / 2 - 1] = -1
        board[self.size / 2, self.size / 2] = -1
        board[self.size / 2 - 1, self.size / 2] = 1
        board[self.size / 2, self.size / 2 - 1] = 1
        return board

    def _find_correct_moves(self, board, color, is_next=False):
        moves = []
        if is_next:
            new_color = 0 - color
        else:
            new_color = color
        for i in range(self.size ** 2):
            x, y = self._deflatten(i)
            valid = self._is_valid(board, x - 1, y - 1, new_color)
            if valid:
                moves.append(i)
        return moves

    def _one_direction_valid(self, board, x, y, color):
        if (x >= 0) and (x < self.size):
            if (y >= 0) and (y < self.size):
                if board[x, y] == color:
                    return True
        return False

    def _is_valid(self, board, x, y, color):
        if board[x, y]:
            return False
        for x_direction in [-1, 0, 1]:
            for y_direction in [-1, 0, 1]:
                new_x = x
                new_y = y
                flag = 0
                while True:
                    new_x += x_direction
                    new_y += y_direction
                    if self._one_direction_valid(board, new_x, new_y, 0 - color):
                        flag = 1
                    else:
                        break
                if self._one_direction_valid(board, new_x, new_y, color) and flag:
                    return True
        return False

    def simulate_get_mask(self, state, action_set):
        history_boards, color = copy.deepcopy(state)
        board = copy.deepcopy(history_boards[-1])
        valid_moves = self._find_correct_moves(board, color)
        if not len(valid_moves):
            invalid_action_mask = action_set[0:-1]
        else:
            invalid_action_mask = []
            for action in action_set:
                if action not in valid_moves:
                    invalid_action_mask.append(action)
        return invalid_action_mask

    def simulate_step_forward(self, state, action):
        history_boards, color = copy.deepcopy(state)
        board = copy.deepcopy(history_boards[-1])
        if action == self.size ** 2:
            valid_moves = self._find_correct_moves(board, color, is_next=True)
            if not len(valid_moves):
                winner = self._get_winner(board)
                return None, winner * color
            else:
                return [history_boards, 0 - color], 0
        new_board = self._step(board, color, action)
        history_boards.append(new_board)
        return [history_boards, 0 - color], 0

    def simulate_hashable_conversion(self, state):
        # since go is MDP, we only need the last board for hashing
        return tuple(state[0][-1].flatten().tolist())

    def _get_winner(self, board):
        black_num, white_num = self._number_of_black_and_white(board)
        black_win = black_num - white_num
        if black_win > 0:
            winner = 1
        elif black_win < 0:
            winner = -1
        else:
            winner = 0
        return winner

    def _number_of_black_and_white(self, board):
        black_num = 0
        white_num = 0
        board_list = np.reshape(board, self.size ** 2)
        for i in range(len(board_list)):
            if board_list[i] == 1:
                black_num += 1
            elif board_list[i] == -1:
                white_num += 1
        return black_num, white_num

    def _step(self, board, color, action):
        if action < 0 or action > self.size ** 2 - 1:
            raise ValueError("Action not in the range of [0,63]!")
        if action is None:
            raise ValueError("Action is None!")
        x, y = self._deflatten(action)
        new_board = self._flip(board, x - 1, y - 1, color)
        return new_board

    def _flip(self, board, x, y, color):
        valid = 0
        board[x, y] = color
        for x_direction in [-1, 0, 1]:
            for y_direction in [-1, 0, 1]:
                new_x = x
                new_y = y
                flag = 0
                while True:
                    new_x += x_direction
                    new_y += y_direction
                    if self._one_direction_valid(board, new_x, new_y, 0 - color):
                        flag = 1
                    else:
                        break
                if self._one_direction_valid(board, new_x, new_y, color) and flag:
                    valid = 1
                    flip_x = x
                    flip_y = y
                    while True:
                        flip_x += x_direction
                        flip_y += y_direction
                        if self._one_direction_valid(board, flip_x, flip_y, 0 - color):
                            board[flip_x, flip_y] = color
                        else:
                            break
        if valid:
            return board
        else:
            raise ValueError("Invalid action")

    def executor_do_move(self, history, latest_boards, board, color, vertex):
        board = np.reshape(board, (self.size, self.size))
        color = color
        action = self._flatten(vertex)
        if action == self.size ** 2:
            valid_moves = self._find_correct_moves(board, color, is_next=True)
            if not len(valid_moves):
                return False
            else:
                return True
        else:
            new_board = self._step(board, color, action)
            history.append(new_board)
            latest_boards.append(new_board)
            return True

    def executor_get_score(self, board):
        board = board
        winner = self._get_winner(board)
        return winner


if __name__ == "__main__":
    reversi = Reversi()
    # board = reversi.get_board()
    # print(board)
    # state, value = reversi.simulate_step_forward([board, -1], 20)
    # print(state[0])
    # print("board")
    # print(board)
    # r = reversi.executor_get_score(board)
    # print(r)