diff --git a/AlphaGo/engine.py b/AlphaGo/engine.py index 98e5e61..5624a2f 100644 --- a/AlphaGo/engine.py +++ b/AlphaGo/engine.py @@ -6,6 +6,8 @@ # from game import Game +import copy +import numpy as np import utils @@ -186,7 +188,10 @@ class GTPEngine(): return self._game.game_engine.executor_get_score(self._game.board), True def cmd_show_board(self, args, **kwargs): - return self._game.board, True + board = copy.deepcopy(self._game.board) + if isinstance(board, np.ndarray): + board = board.flatten().tolist() + return board, True def cmd_get_prob(self, args, **kwargs): return self._game.prob, True diff --git a/AlphaGo/game.py b/AlphaGo/game.py index 442cb73..3a7959c 100644 --- a/AlphaGo/game.py +++ b/AlphaGo/game.py @@ -26,33 +26,37 @@ class Game: TODO : Maybe merge with the engine class in future, currently leave it untouched for interacting with Go UI. ''' - def __init__(self, name="go", role="unknown", debug=False, checkpoint_path=None): + def __init__(self, name="reversi", role="unknown", debug=False, checkpoint_path=None): self.name = name self.role = role self.debug = debug if self.name == "go": self.size = 9 self.komi = 3.75 - self.board = [utils.EMPTY] * (self.size ** 2) self.history = [] self.history_length = 8 - self.latest_boards = deque(maxlen=8) - for _ in range(8): - self.latest_boards.append(self.board) self.game_engine = go.Go(size=self.size, komi=self.komi, role=self.role) + self.board = [utils.EMPTY] * (self.size ** 2) elif self.name == "reversi": self.size = 8 self.history_length = 1 - self.game_engine = reversi.Reversi() + self.history = [] + self.game_engine = reversi.Reversi(size=self.size) self.board = self.game_engine.get_board() else: raise ValueError(name + " is an unknown game...") self.evaluator = model.ResNet(self.size, self.size ** 2 + 1, history_length=self.history_length) + self.latest_boards = deque(maxlen=self.history_length) + for _ in range(self.history_length): + self.latest_boards.append(self.board) def clear(self): - self.board = [utils.EMPTY] * (self.size ** 2) - self.history = [] + if self.name == "go": + self.board = [utils.EMPTY] * (self.size ** 2) + self.history = [] + if self.name == "reversi": + self.board = self.game_engine.get_board() for _ in range(self.history_length): self.latest_boards.append(self.board) @@ -84,7 +88,7 @@ class Game: if self.name == "go": res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex) elif self.name == "reversi": - res = self.game_engine.executor_do_move(self.board, color, vertex) + res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex) return res def think_play_move(self, color): @@ -110,13 +114,14 @@ class Game: if row[i] < 10: print(' ', end='') for j in range(self.size): - print(self.status2symbol(self.board[self._flatten((j + 1, i + 1))]), end=' ') + print(self.status2symbol(self.board[self.game_engine._flatten((j + 1, i + 1))]), end=' ') print('') sys.stdout.flush() if __name__ == "__main__": - g = Game() - g.show_board() + g = Game("go") + print(g.board) + g.clear() g.think_play_move(1) #file = open("debug.txt", "a") #file.write("mcts check\n") diff --git a/AlphaGo/go.py b/AlphaGo/go.py index 833b01f..aca6632 100644 --- a/AlphaGo/go.py +++ b/AlphaGo/go.py @@ -212,12 +212,12 @@ class Go: def simulate_step_forward(self, state, action): # initialize the simulate_board from state - history_boards, color = state + history_boards, color = copy.deepcopy(state) if history_boards[-1] == history_boards[-2] and action is utils.PASS: return None, 2 * (float(self.executor_get_score(history_boards[-1]) > 0)-0.5) * color else: vertex = self._action2vertex(action) - new_board = self._do_move(copy.copy(history_boards[-1]), color, vertex) + new_board = self._do_move(copy.deepcopy(history_boards[-1]), color, vertex) history_boards.append(new_board) new_color = -color return [history_boards, new_color], 0 @@ -227,8 +227,8 @@ class Go: return False current_board[self._flatten(vertex)] = color self._process_board(current_board, color, vertex) - history.append(copy.copy(current_board)) - latest_boards.append(copy.copy(current_board)) + history.append(copy.deepcopy(current_board)) + latest_boards.append(copy.deepcopy(current_board)) return True def _find_empty(self, current_board): diff --git a/AlphaGo/model.py b/AlphaGo/model.py index 2a620f9..0549f41 100644 --- a/AlphaGo/model.py +++ b/AlphaGo/model.py @@ -173,10 +173,10 @@ class ResNet(object): """ state = np.zeros([1, self.board_size, self.board_size, 2 * self.history_length + 1]) for i in range(self.history_length): - state[0, :, :, i] = np.array(np.array(history[i]) == np.ones(self.board_size ** 2)).reshape(self.board_size, + state[0, :, :, i] = np.array(np.array(history[i]).flatten() == np.ones(self.board_size ** 2)).reshape(self.board_size, self.board_size) state[0, :, :, i + self.history_length] = np.array( - np.array(history[i]) == -np.ones(self.board_size ** 2)).reshape(self.board_size, self.board_size) + np.array(history[i]).flatten() == -np.ones(self.board_size ** 2)).reshape(self.board_size, self.board_size) # TODO: need a config to specify the BLACK and WHITE if color == +1: state[0, :, :, 2 * self.history_length] = np.ones([self.board_size, self.board_size]) @@ -301,7 +301,7 @@ class ResNet(object): :return: """ - new_board = copy.copy(board) + new_board = copy.deepcopy(board) if new_board.ndim == 3: new_board = np.expand_dims(new_board, axis=0) @@ -331,7 +331,7 @@ class ResNet(object): :param orientation: an integer, which orientation to reflect :return: """ - new_board = copy.copy(board) + new_board = copy.deepcopy(board) for _ in range(times): if orientation == 0: new_board = new_board[:, ::-1] diff --git a/AlphaGo/play.py b/AlphaGo/play.py index 9144a40..2731948 100644 --- a/AlphaGo/play.py +++ b/AlphaGo/play.py @@ -89,7 +89,7 @@ if __name__ == '__main__': pattern = "[A-Z]{1}[0-9]{1}" space = re.compile("\s+") - size = 9 + size = {"go":9, "reversi":8} show = ['.', 'X', 'O'] evaluate_rounds = 1 @@ -102,13 +102,13 @@ if __name__ == '__main__': pass_flag = [False, False] print("Start game {}".format(game_num)) # end the game if both palyer chose to pass, or play too much turns - while not (pass_flag[0] and pass_flag[1]) and num < size ** 2 * 2: + while not (pass_flag[0] and pass_flag[1]) and num < size["reversi"] ** 2 * 2: turn = num % 2 board = player[turn].run_cmd(str(num) + ' show_board') board = eval(board[board.index('['):board.index(']') + 1]) - for i in range(size): - for j in range(size): - print show[board[i * size + j]] + " ", + for i in range(size["reversi"]): + for j in range(size["reversi"]): + print show[board[i * size["reversi"] + j]] + " ", print "\n", data.boards.append(board) start_time = time.time() diff --git a/AlphaGo/reversi.py b/AlphaGo/reversi.py index 4fa1468..c6c8a5b 100644 --- a/AlphaGo/reversi.py +++ b/AlphaGo/reversi.py @@ -1,4 +1,5 @@ import numpy as np +import copy ''' Settings of the Reversi game. @@ -8,13 +9,8 @@ Settings of the Reversi game. class Reversi: - def __init__(self, black=None, white=None): - self.board = None # 8 * 8 board with 1 for black, -1 for white and 0 for blank - self.color = None # 1 for black and -1 for white - self.action = None # number in 0~63 - self.winner = None - self.black_win = None - self.size = 8 + def __init__(self, **kwargs): + self.size = kwargs['size'] def _deflatten(self, idx): x = idx // self.size + 1 @@ -24,39 +20,39 @@ class Reversi: def _flatten(self, vertex): x, y = vertex if (x == 0) and (y == 0): - return 64 + return self.size ** 2 return (x - 1) * self.size + (y - 1) - def get_board(self, board=None): - self.board = board or np.zeros([8,8]) - self.board[3, 3] = -1 - self.board[4, 4] = -1 - self.board[3, 4] = 1 - self.board[4, 3] = 1 - return self.board + def get_board(self): + board = np.zeros([self.size, self.size], dtype=np.int32) + board[self.size / 2 - 1, self.size / 2 - 1] = -1 + board[self.size / 2, self.size / 2] = -1 + board[self.size / 2 - 1, self.size / 2] = 1 + board[self.size / 2, self.size / 2 - 1] = 1 + return board - def _find_correct_moves(self, is_next=False): + def _find_correct_moves(self, board, color, is_next=False): moves = [] if is_next: - color = 0 - self.color + new_color = 0 - color else: - color = self.color - for i in range(64): + new_color = color + for i in range(self.size ** 2): x, y = self._deflatten(i) - valid = self._is_valid(x - 1, y - 1, color) + valid = self._is_valid(board, x - 1, y - 1, new_color) if valid: moves.append(i) return moves - def _one_direction_valid(self, x, y, color): + def _one_direction_valid(self, board, x, y, color): if (x >= 0) and (x < self.size): if (y >= 0) and (y < self.size): - if self.board[x, y] == color: + if board[x, y] == color: return True return False - def _is_valid(self, x, y, color): - if self.board[x, y]: + def _is_valid(self, board, x, y, color): + if board[x, y]: return False for x_direction in [-1, 0, 1]: for y_direction in [-1, 0, 1]: @@ -66,20 +62,18 @@ class Reversi: while True: new_x += x_direction new_y += y_direction - if self._one_direction_valid(new_x, new_y, 0 - color): + if self._one_direction_valid(board, new_x, new_y, 0 - color): flag = 1 else: break - if self._one_direction_valid(new_x, new_y, color) and flag: + if self._one_direction_valid(board, new_x, new_y, color) and flag: return True return False def simulate_get_mask(self, state, action_set): - history_boards, color = state - self.board = np.reshape(history_boards[-1], (self.size, self.size)) - self.color = color - valid_moves = self._find_correct_moves() - print(valid_moves) + history_boards, color = copy.deepcopy(state) + board = copy.deepcopy(history_boards[-1]) + valid_moves = self._find_correct_moves(board, color) if not len(valid_moves): invalid_action_mask = action_set[0:-1] else: @@ -90,34 +84,34 @@ class Reversi: return invalid_action_mask def simulate_step_forward(self, state, action): - self.board = state[0].copy() - self.board = np.reshape(self.board, (self.size, self.size)) - self.color = state[1] - self.action = action - if self.action == 64: - valid_moves = self._find_correct_moves(is_next=True) + history_boards, color = copy.deepcopy(state) + board = copy.deepcopy(history_boards[-1]) + if action == self.size ** 2: + valid_moves = self._find_correct_moves(board, color, is_next=True) if not len(valid_moves): - self._game_over() - return None, self.winner * self.color + winner = self._get_winner(board) + return None, winner * color else: - return [self.board, 0 - self.color], 0 - self._step() - return [self.board, 0 - self.color], 0 + return [history_boards, 0 - color], 0 + new_board = self._step(board, color, action) + history_boards.append(new_board) + return [history_boards, 0 - color], 0 - def _game_over(self): - black_num, white_num = self._number_of_black_and_white() - self.black_win = black_num - white_num - if self.black_win > 0: - self.winner = 1 - elif self.black_win < 0: - self.winner = -1 + def _get_winner(self, board): + black_num, white_num = self._number_of_black_and_white(board) + black_win = black_num - white_num + if black_win > 0: + winner = 1 + elif black_win < 0: + winner = -1 else: - self.winner = 0 + winner = 0 + return winner - def _number_of_black_and_white(self): + def _number_of_black_and_white(self, board): black_num = 0 white_num = 0 - board_list = np.reshape(self.board, self.size ** 2) + board_list = np.reshape(board, self.size ** 2) for i in range(len(board_list)): if board_list[i] == 1: black_num += 1 @@ -125,19 +119,18 @@ class Reversi: white_num += 1 return black_num, white_num - def _step(self): - if self.action < 0 or self.action > 63: + def _step(self, board, color, action): + if action < 0 or action > self.size ** 2 - 1: raise ValueError("Action not in the range of [0,63]!") - if self.action is None: + if action is None: raise ValueError("Action is None!") - x, y = self._deflatten(self.action) - valid = self._flip(x -1, y - 1) - if not valid: - raise ValueError("Illegal action!") + x, y = self._deflatten(action) + new_board = self._flip(board, x - 1, y - 1, color) + return new_board - def _flip(self, x, y): + def _flip(self, board, x, y, color): valid = 0 - self.board[x, y] = self.color + board[x, y] = color for x_direction in [-1, 0, 1]: for y_direction in [-1, 0, 1]: new_x = x @@ -146,47 +139,46 @@ class Reversi: while True: new_x += x_direction new_y += y_direction - if self._one_direction_valid(new_x, new_y, 0 - self.color): + if self._one_direction_valid(board, new_x, new_y, 0 - color): flag = 1 else: break - if self._one_direction_valid(new_x, new_y, self.color) and flag: + if self._one_direction_valid(board, new_x, new_y, color) and flag: valid = 1 flip_x = x flip_y = y while True: flip_x += x_direction flip_y += y_direction - if self._one_direction_valid(flip_x, flip_y, 0 - self.color): - self.board[flip_x, flip_y] = self.color + if self._one_direction_valid(board, flip_x, flip_y, 0 - color): + board[flip_x, flip_y] = color else: break if valid: - return True + return board else: - return False + raise ValueError("Invalid action") def executor_do_move(self, history, latest_boards, board, color, vertex): - self.board = np.reshape(board, (self.size, self.size)) - self.color = color - self.action = self._flatten(vertex) - if self.action == 64: - valid_moves = self._find_correct_moves(is_next=True) + board = np.reshape(board, (self.size, self.size)) + color = color + action = self._flatten(vertex) + if action == self.size ** 2: + valid_moves = self._find_correct_moves(board, color, is_next=True) if not len(valid_moves): return False else: return True else: - self._step() + new_board = self._step(board, color, action) + history.append(new_board) + latest_boards.append(new_board) return True def executor_get_score(self, board): - self.board = board - self._game_over() - if self.black_win is not None: - return self.black_win - else: - raise ValueError("Game not finished!") + board = board + winner = self._get_winner(board) + return winner if __name__ == "__main__": diff --git a/tianshou/core/mcts/mcts.py b/tianshou/core/mcts/mcts.py index e565337..493cf7d 100644 --- a/tianshou/core/mcts/mcts.py +++ b/tianshou/core/mcts/mcts.py @@ -110,15 +110,15 @@ class ActionNode(object): self.reward = 0 def type_conversion_to_tuple(self): - if type(self.next_state) is np.ndarray: + if isinstance(self.next_state, np.ndarray): self.next_state = self.next_state.tolist() - if type(self.next_state) is list: + if isinstance(self.next_state, list): self.next_state = list2tuple(self.next_state) def type_conversion_to_origin(self): - if self.state_type is np.ndarray: + if isinstance(self.state_type, np.ndarray): self.next_state = np.array(self.next_state) - if self.state_type is list: + if isinstance(self.state_type, np.ndarray): self.next_state = tuple2list(self.next_state) def selection(self, simulator):