From 72a9f4823cc02ee82b00b06dc45cdf619dc566d5 Mon Sep 17 00:00:00 2001 From: Dong Yan Date: Tue, 19 Dec 2017 11:16:17 +0800 Subject: [PATCH] rename variable for clarity --- AlphaGo/game.py | 16 ++++----- AlphaGo/go.py | 83 +++++++++++++++++++++++---------------------- AlphaGo/strategy.py | 60 ++++++++++++++++---------------- 3 files changed, 80 insertions(+), 79 deletions(-) diff --git a/AlphaGo/game.py b/AlphaGo/game.py index 02ccb27..3b62435 100644 --- a/AlphaGo/game.py +++ b/AlphaGo/game.py @@ -29,7 +29,7 @@ class Game: def __init__(self, size=9, komi=6.5, checkpoint_path=None): self.size = size self.komi = komi - self.board = [utils.EMPTY] * (self.size * self.size) + self.board = [utils.EMPTY] * (self.size ** 2) self.history = [] self.latest_boards = deque(maxlen=8) for _ in range(8): @@ -54,7 +54,7 @@ class Game: return (x,y) def clear(self): - self.board = [utils.EMPTY] * (self.size * self.size) + self.board = [utils.EMPTY] * (self.size ** 2) self.history = [] for _ in range(8): self.latest_boards.append(self.board) @@ -66,11 +66,11 @@ class Game: def set_komi(self, k): self.komi = k - def generate_nn_input(self, history, color): + def generate_nn_input(self, latest_boards, color): state = np.zeros([1, self.size, self.size, 17]) for i in range(8): - state[0, :, :, i] = np.array(np.array(history[i]) == np.ones(self.size ** 2)).reshape(self.size, self.size) - state[0, :, :, i + 8] = np.array(np.array(history[i]) == -np.ones(self.size ** 2)).reshape(self.size, self.size) + state[0, :, :, i] = np.array(np.array(latest_boards[i]) == np.ones(self.size ** 2)).reshape(self.size, self.size) + state[0, :, :, i + 8] = np.array(np.array(latest_boards[i]) == -np.ones(self.size ** 2)).reshape(self.size, self.size) if color == utils.BLACK: state[0, :, :, 16] = np.ones([self.size, self.size]) if color == utils.WHITE: @@ -78,9 +78,9 @@ class Game: return state def strategy_gen_move(self, latest_boards, color): - self.simulator.latest_boards = copy.copy(latest_boards) - self.simulator.board = copy.copy(latest_boards[-1]) - nn_input = self.generate_nn_input(self.simulator.latest_boards, color) + self.simulator.simulate_latest_boards = copy.copy(latest_boards) + self.simulator.simulate_board = copy.copy(latest_boards[-1]) + nn_input = self.generate_nn_input(self.simulator.simulate_latest_boards, color) mcts = MCTS(self.simulator, self.evaluator, nn_input, self.size ** 2 + 1, inverse=True, max_step=1) temp = 1 prob = mcts.root.N ** temp / np.sum(mcts.root.N ** temp) diff --git a/AlphaGo/go.py b/AlphaGo/go.py index 752973e..7b1d3e7 100644 --- a/AlphaGo/go.py +++ b/AlphaGo/go.py @@ -28,7 +28,7 @@ class Go: def _find_block(self, vertex): block = [] - status = [False] * (self.game.size * self.game.size) + status = [False] * (self.game.size ** 2) color = self.game.board[self.game._flatten(vertex)] self._bfs(vertex, color, block, status) @@ -40,7 +40,7 @@ class Go: def _find_boarder(self, vertex): block = [] - status = [False] * (self.game.size * self.game.size) + status = [False] * (self.game.size ** 2) self._bfs(vertex, utils.EMPTY, block, status) border = [] for b in block: @@ -141,6 +141,46 @@ class Go: idx = [i for i,x in enumerate(self.game.board) if x == utils.EMPTY ][0] return self.game._deflatten(idx) + def _add_nearby_stones(self, neighbor_vertex_set, start_vertex_x, start_vertex_y, x_diff, y_diff, num_step): + ''' + add the nearby stones around the input vertex + :param neighbor_vertex_set: input list + :param start_vertex_x: x axis of the input vertex + :param start_vertex_y: y axis of the input vertex + :param x_diff: add x axis + :param y_diff: add y axis + :param num_step: number of steps to be added + :return: + ''' + for step in xrange(num_step): + new_neighbor_vertex = (start_vertex_x, start_vertex_y) + if self._in_board(new_neighbor_vertex): + neighbor_vertex_set.append((start_vertex_x, start_vertex_y)) + start_vertex_x += x_diff + start_vertex_y += y_diff + + def _predict_from_nearby(self, vertex, neighbor_step = 3): + ''' + step: the nearby 3 steps is considered + :vertex: position to be estimated + :neighbor_step: how many steps nearby + :return: the nearby positions of the input position + currently the nearby 3*3 grid is returned, altogether 4*8 points involved + ''' + for step in range(1, neighbor_step + 1): # check the stones within the steps in range + neighbor_vertex_set = [] + self._add_nearby_stones(neighbor_vertex_set, vertex[0] - step, vertex[1], 1, 1, neighbor_step) + self._add_nearby_stones(neighbor_vertex_set, vertex[0], vertex[1] + step, 1, -1, neighbor_step) + self._add_nearby_stones(neighbor_vertex_set, vertex[0] + step, vertex[1], -1, -1, neighbor_step) + self._add_nearby_stones(neighbor_vertex_set, vertex[0], vertex[1] - step, -1, 1, neighbor_step) + color_estimate = 0 + for neighbor_vertex in neighbor_vertex_set: + color_estimate += self.game.board[self.game._flatten(neighbor_vertex)] + if color_estimate > 0: + return utils.BLACK + elif color_estimate < 0: + return utils.WHITE + def get_score(self, is_unknown_estimation = False): ''' is_unknown_estimation: whether use nearby stone to predict the unknown @@ -170,42 +210,3 @@ class Go: self.game.board = _board return score - def _predict_from_nearby(self, vertex, neighbor_step = 3): - ''' - step: the nearby 3 steps is considered - :vertex: position to be estimated - :neighbor_step: how many steps nearby - :return: the nearby positions of the input position - currently the nearby 3*3 grid is returned, altogether 4*8 points involved - ''' - for step in range(1, neighbor_step + 1): # check the stones within the steps in range - neighbor_vertex_set = [] - self._add_nearby_stones(neighbor_vertex_set, vertex[0] - step, vertex[1], 1, 1, neighbor_step) - self._add_nearby_stones(neighbor_vertex_set, vertex[0], vertex[1] + step, 1, -1, neighbor_step) - self._add_nearby_stones(neighbor_vertex_set, vertex[0] + step, vertex[1], -1, -1, neighbor_step) - self._add_nearby_stones(neighbor_vertex_set, vertex[0], vertex[1] - step, -1, 1, neighbor_step) - color_estimate = 0 - for neighbor_vertex in neighbor_vertex_set: - color_estimate += self.game.board[self.game._flatten(neighbor_vertex)] - if color_estimate > 0: - return utils.BLACK - elif color_estimate < 0: - return utils.WHITE - - def _add_nearby_stones(self, neighbor_vertex_set, start_vertex_x, start_vertex_y, x_diff, y_diff, num_step): - ''' - add the nearby stones around the input vertex - :param neighbor_vertex_set: input list - :param start_vertex_x: x axis of the input vertex - :param start_vertex_y: y axis of the input vertex - :param x_diff: add x axis - :param y_diff: add y axis - :param num_step: number of steps to be added - :return: - ''' - for step in xrange(num_step): - new_neighbor_vertex = (start_vertex_x, start_vertex_y) - if self._in_board(new_neighbor_vertex): - neighbor_vertex_set.append((start_vertex_x, start_vertex_y)) - start_vertex_x += x_diff - start_vertex_y += y_diff diff --git a/AlphaGo/strategy.py b/AlphaGo/strategy.py index e00e69d..fe6bcbf 100644 --- a/AlphaGo/strategy.py +++ b/AlphaGo/strategy.py @@ -16,15 +16,15 @@ CORNER_OFFSET = [[-1, -1], [-1, 1], [1, 1], [1, -1]] class GoEnv: def __init__(self, **kwargs): self.game = kwargs['game'] - self.board = [utils.EMPTY] * (self.game.size * self.game.size) - self.latest_boards = deque(maxlen=8) + self.simulate_board = [utils.EMPTY] * (self.game.size ** 2) + self.simulate_latest_boards = deque(maxlen=8) - def _flatten(self, vertex): + def simulate_flatten(self, vertex): x, y = vertex return (x - 1) * self.game.size + (y - 1) def _find_group(self, start): - color = self.board[self._flatten(start)] + color = self.simulate_board[self.simulate_flatten(start)] # print ("color : ", color) chain = set() frontier = [start] @@ -35,45 +35,45 @@ class GoEnv: chain.add(current) for n in self._neighbor(current): # print n, self._flatten(n), self.board[self._flatten(n)], - if self.board[self._flatten(n)] == color and not n in chain: + if self.simulate_board[self.simulate_flatten(n)] == color and not n in chain: frontier.append(n) - if self.board[self._flatten(n)] == utils.EMPTY: + if self.simulate_board[self.simulate_flatten(n)] == utils.EMPTY: has_liberty = True return has_liberty, chain def _is_suicide(self, color, vertex): ### assume that we already take this move - self.board[self._flatten(vertex)] = color + self.simulate_board[self.simulate_flatten(vertex)] = color has_liberty, group = self._find_group(vertex) if has_liberty: ### this group still has liberty after this move, not suicide - self.board[self._flatten(vertex)] = utils.EMPTY + self.simulate_board[self.simulate_flatten(vertex)] = utils.EMPTY return False else: ### liberty is zero for n in self._neighbor(vertex): - if self.board[self._flatten(n)] == utils.another_color(color): + if self.simulate_board[self.simulate_flatten(n)] == utils.another_color(color): opponent_liberty, group = self._find_group(n) # this move is able to take opponent's stone, not suicide if not opponent_liberty: - self.board[self._flatten(vertex)] = utils.EMPTY + self.simulate_board[self.simulate_flatten(vertex)] = utils.EMPTY return False # not a take, suicide - self.board[self._flatten(vertex)] = utils.EMPTY + self.simulate_board[self.simulate_flatten(vertex)] = utils.EMPTY return True def _check_global_isomorphous(self, color, vertex): ##backup - _board = copy.copy(self.board) - self.board[self._flatten(vertex)] = color + _board = copy.copy(self.simulate_board) + self.simulate_board[self.simulate_flatten(vertex)] = color self._process_board(color, vertex) - if self.board in self.latest_boards: + if self.simulate_board in self.simulate_latest_boards: res = True else: res = False - self.board = _board + self.simulate_board = _board return res def _in_board(self, vertex): @@ -105,16 +105,16 @@ class GoEnv: def _process_board(self, color, vertex): nei = self._neighbor(vertex) for n in nei: - if self.board[self._flatten(n)] == utils.another_color(color): + if self.simulate_board[self.simulate_flatten(n)] == utils.another_color(color): has_liberty, group = self._find_group(n) if not has_liberty: for b in group: - self.board[self._flatten(b)] = utils.EMPTY + self.simulate_board[self.simulate_flatten(b)] = utils.EMPTY def _is_eye(self, color, vertex): nei = self._neighbor(vertex) cor = self._corner(vertex) - ncolor = {color == self.board[self._flatten(n)] for n in nei} + ncolor = {color == self.simulate_board[self.simulate_flatten(n)] for n in nei} if False in ncolor: # print "not all neighbors are in same color with us" return False @@ -123,7 +123,7 @@ class GoEnv: # print "all neighbors are in same group and same color with us" return True else: - opponent_number = [self.board[self._flatten(c)] for c in cor].count(-color) + opponent_number = [self.simulate_board[self.simulate_flatten(c)] for c in cor].count(-color) opponent_propotion = float(opponent_number) / float(len(cor)) if opponent_propotion < 0.5: # print "few opponents, real eye" @@ -141,7 +141,7 @@ class GoEnv: def simulate_is_valid(self, state, action): # state is the play board, the shape is [1, 9, 9, 17] - if action == self.game.size * self.game.size: + if action == self.game.size ** 2: vertex = (0, 0) else: vertex = (action / self.game.size + 1, action % self.game.size + 1) @@ -149,17 +149,17 @@ class GoEnv: color = utils.BLACK else: color = utils.WHITE - self.latest_boards.clear() + self.simulate_latest_boards.clear() for i in range(8): - self.latest_boards.append((state[:, :, :, i] - state[:, :, :, i + 8]).reshape(-1).tolist()) - self.board = copy.copy(self.latest_boards[-1]) + self.simulate_latest_boards.append((state[:, :, :, i] - state[:, :, :, i + 8]).reshape(-1).tolist()) + self.simulate_board = copy.copy(self.simulate_latest_boards[-1]) ### in board if not self._in_board(vertex): return False ### already have stone - if not self.board[self._flatten(vertex)] == utils.EMPTY: + if not self.simulate_board[self.simulate_flatten(vertex)] == utils.EMPTY: # print(np.array(self.board).reshape(9, 9)) # print(vertex) return False @@ -181,9 +181,9 @@ class GoEnv: if vertex == utils.PASS: return True - id_ = self._flatten(vertex) - if self.board[id_] == utils.EMPTY: - self.board[id_] = color + id_ = self.simulate_flatten(vertex) + if self.simulate_board[id_] == utils.EMPTY: + self.simulate_board[id_] = color return True else: return False @@ -199,11 +199,11 @@ class GoEnv: vertex = (action % self.game.size + 1, action / self.game.size + 1) # print(vertex) # print(self.board) - self.board = (state[:, :, :, 7] - state[:, :, :, 15]).reshape(-1).tolist() + self.simulate_board = (state[:, :, :, 7] - state[:, :, :, 15]).reshape(-1).tolist() self.do_move(color, vertex) new_state = np.concatenate( - [state[:, :, :, 1:8], (np.array(self.board) == utils.BLACK).reshape(1, self.game.size, self.game.size, 1), - state[:, :, :, 9:16], (np.array(self.board) == utils.WHITE).reshape(1, self.game.size, self.game.size, 1), + [state[:, :, :, 1:8], (np.array(self.simulate_board) == utils.BLACK).reshape(1, self.game.size, self.game.size, 1), + state[:, :, :, 9:16], (np.array(self.simulate_board) == utils.WHITE).reshape(1, self.game.size, self.game.size, 1), np.array(1 - state[:, :, :, -1]).reshape(1, self.game.size, self.game.size, 1)], axis=3) return new_state, 0