diff --git a/AlphaGo/engine.py b/AlphaGo/engine.py index c9f1a3c..8b54470 100644 --- a/AlphaGo/engine.py +++ b/AlphaGo/engine.py @@ -183,7 +183,7 @@ class GTPEngine(): return 'unknown player', False def cmd_get_score(self, args, **kwargs): - return self._game.game_engine.executor_get_score(True), True + return self._game.game_engine.executor_get_score(self._game.board, True), True def cmd_show_board(self, args, **kwargs): return self._game.board, True diff --git a/AlphaGo/game.py b/AlphaGo/game.py index bf0d084..11ce52b 100644 --- a/AlphaGo/game.py +++ b/AlphaGo/game.py @@ -34,16 +34,7 @@ class Game: self.evaluator = model.ResNet(self.size, self.size**2 + 1, history_length=8) # self.evaluator = lambda state: self.sess.run([tf.nn.softmax(self.net.p), self.net.v], # feed_dict={self.net.x: state, self.net.is_training: False}) - self.game_engine = go.Go(game=self) - - def _flatten(self, vertex): - x, y = vertex - return (x - 1) * self.size + (y - 1) - - def _deflatten(self, idx): - x = idx // self.size + 1 - y = idx % self.size + 1 - return (x, y) + self.game_engine = go.Go(size=self.size, komi=self.komi) def clear(self): self.board = [utils.EMPTY] * (self.size ** 2) @@ -67,14 +58,14 @@ class Game: if choice == self.size ** 2: move = utils.PASS else: - move = self._deflatten(choice) + move = self.game_engine._deflatten(choice) return move, prob def play_move(self, color, vertex): # this function can be called directly to play the opponent's move if vertex == utils.PASS: return True - res = self.game_engine.executor_do_move(color, vertex) + res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex) return res def think_play_move(self, color): diff --git a/AlphaGo/go.py b/AlphaGo/go.py index 4f1c759..9b7e21f 100644 --- a/AlphaGo/go.py +++ b/AlphaGo/go.py @@ -16,12 +16,22 @@ CORNER_OFFSET = [[-1, -1], [-1, 1], [1, 1], [1, -1]] class Go: def __init__(self, **kwargs): - self.game = kwargs['game'] + self.size = kwargs['size'] + self.komi = kwargs['komi'] + + def _flatten(self, vertex): + x, y = vertex + return (x - 1) * self.size + (y - 1) + + def _deflatten(self, idx): + x = idx // self.size + 1 + y = idx % self.size + 1 + return (x, y) def _in_board(self, vertex): x, y = vertex - if x < 1 or x > self.game.size: return False - if y < 1 or y > self.game.size: return False + if x < 1 or x > self.size: return False + if y < 1 or y > self.size: return False return True def _neighbor(self, vertex): @@ -45,7 +55,7 @@ class Go: return corner def _find_group(self, current_board, vertex): - color = current_board[self.game._flatten(vertex)] + color = current_board[self._flatten(vertex)] # print ("color : ", color) chain = set() frontier = [vertex] @@ -55,41 +65,41 @@ class Go: # print ("current : ", current) chain.add(current) for n in self._neighbor(current): - if current_board[self.game._flatten(n)] == color and not n in chain: + if current_board[self._flatten(n)] == color and not n in chain: frontier.append(n) - if current_board[self.game._flatten(n)] == utils.EMPTY: + if current_board[self._flatten(n)] == utils.EMPTY: has_liberty = True return has_liberty, chain def _is_suicide(self, current_board, color, vertex): - current_board[self.game._flatten(vertex)] = color # assume that we already take this move + current_board[self._flatten(vertex)] = color # assume that we already take this move suicide = False has_liberty, group = self._find_group(current_board, vertex) if not has_liberty: suicide = True # no liberty, suicide for n in self._neighbor(vertex): - if current_board[self.game._flatten(n)] == utils.another_color(color): + if current_board[self._flatten(n)] == utils.another_color(color): opponent_liberty, group = self._find_group(current_board, n) if not opponent_liberty: suicide = False # this move is able to take opponent's stone, not suicide - current_board[self.game._flatten(vertex)] = utils.EMPTY # undo this move + current_board[self._flatten(vertex)] = utils.EMPTY # undo this move return suicide def _process_board(self, current_board, color, vertex): nei = self._neighbor(vertex) for n in nei: - if current_board[self.game._flatten(n)] == utils.another_color(color): + if current_board[self._flatten(n)] == utils.another_color(color): has_liberty, group = self._find_group(current_board, n) if not has_liberty: for b in group: - current_board[self.game._flatten(b)] = utils.EMPTY + current_board[self._flatten(b)] = utils.EMPTY def _check_global_isomorphous(self, history_boards, current_board, color, vertex): repeat = False next_board = copy.copy(current_board) - next_board[self.game._flatten(vertex)] = color + next_board[self._flatten(vertex)] = color self._process_board(next_board, color, vertex) if next_board in history_boards: repeat = True @@ -98,7 +108,7 @@ class Go: def _is_eye(self, current_board, color, vertex): nei = self._neighbor(vertex) cor = self._corner(vertex) - ncolor = {color == current_board[self.game._flatten(n)] for n in nei} + ncolor = {color == current_board[self._flatten(n)] for n in nei} if False in ncolor: # print "not all neighbors are in same color with us" return False @@ -107,7 +117,7 @@ class Go: # print "all neighbors are in same group and same color with us" return True else: - opponent_number = [current_board[self.game._flatten(c)] for c in cor].count(-color) + opponent_number = [current_board[self._flatten(c)] for c in cor].count(-color) opponent_propotion = float(opponent_number) / float(len(cor)) if opponent_propotion < 0.5: # print "few opponents, real eye" @@ -131,20 +141,20 @@ class Go: board = copy.deepcopy(current_board) empty_idx = [i for i, x in enumerate(board) if x == utils.EMPTY] # find all empty idx for idx in empty_idx: - neighbor_idx = self._neighbor(self.game.deflatten(idx)) + neighbor_idx = self._neighbor(self.deflatten(idx)) if len(neighbor_idx) > 1: first_idx = neighbor_idx[0] for other_idx in neighbor_idx[1:]: - if self.game.board[self.game.flatten(other_idx)] != self.game.board[self.game.flatten(first_idx)]: + if board[self.flatten(other_idx)] != board[self.flatten(first_idx)]: return False return True def _action2vertex(self, action): - if action == self.game.size ** 2: + if action == self.size ** 2: vertex = (0, 0) else: - vertex = self.game._deflatten(action) + vertex = self._deflatten(action) return vertex def _is_valid(self, history_boards, current_board, color, vertex): @@ -153,7 +163,7 @@ class Go: return False ### already have stone - if not current_board[self.game._flatten(vertex)] == utils.EMPTY: + if not current_board[self._flatten(vertex)] == utils.EMPTY: return False ### check if it is suicide @@ -195,7 +205,7 @@ class Go: if vertex == utils.PASS: return board else: - id_ = self.game._flatten(vertex) + id_ = self._flatten(vertex) board[id_] = color return board @@ -208,21 +218,21 @@ class Go: new_color = -color return [history_boards, new_color], 0 - def executor_do_move(self, color, vertex): - if not self._is_valid(self.game.history, self.game.board, color, vertex): + def executor_do_move(self, history, latest_boards, current_board, color, vertex): + if not self._is_valid(history, current_board, color, vertex): return False - self.game.board[self.game._flatten(vertex)] = color - self._process_board(self.game.board, color, vertex) - self.game.history.append(copy.copy(self.game.board)) - self.game.latest_boards.append(copy.copy(self.game.board)) + current_board[self._flatten(vertex)] = color + self._process_board(current_board, color, vertex) + history.append(copy.copy(current_board)) + latest_boards.append(copy.copy(current_board)) return True - def _find_empty(self): - idx = [i for i,x in enumerate(self.game.board) if x == utils.EMPTY ][0] - return self.game._deflatten(idx) + def _find_empty(self, current_board): + idx = [i for i,x in enumerate(current_board) if x == utils.EMPTY ][0] + return self._deflatten(idx) - def _find_boarder(self, vertex): - _, group = self._find_group(self.game.board, vertex) + def _find_boarder(self, current_board, vertex): + _, group = self._find_group(current_board, vertex) border = [] for b in group: for n in self._neighbor(b): @@ -248,7 +258,7 @@ class Go: start_vertex_x += x_diff start_vertex_y += y_diff - def _predict_from_nearby(self, vertex, neighbor_step=3): + def _predict_from_nearby(self, current_board, vertex, neighbor_step=3): ''' step: the nearby 3 steps is considered :vertex: position to be estimated @@ -264,38 +274,37 @@ class Go: self._add_nearby_stones(neighbor_vertex_set, vertex[0], vertex[1] - step, -1, 1, neighbor_step) color_estimate = 0 for neighbor_vertex in neighbor_vertex_set: - color_estimate += self.game.board[self.game._flatten(neighbor_vertex)] + color_estimate += current_board[self._flatten(neighbor_vertex)] if color_estimate > 0: return utils.BLACK elif color_estimate < 0: return utils.WHITE - def executor_get_score(self, is_unknown_estimation=False): + def executor_get_score(self, current_board, is_unknown_estimation=False): ''' is_unknown_estimation: whether use nearby stone to predict the unknown return score from BLACK perspective. ''' - _board = copy.copy(self.game.board) - while utils.EMPTY in self.game.board: - vertex = self._find_empty() - boarder = self._find_boarder(vertex) - boarder_color = set(map(lambda v: self.game.board[self.game._flatten(v)], boarder)) + _board = copy.deepcopy(current_board) + while utils.EMPTY in _board: + vertex = self._find_empty(_board) + boarder = self._find_boarder(_board, vertex) + boarder_color = set(map(lambda v: _board[self._flatten(v)], boarder)) if boarder_color == {utils.BLACK}: - self.game.board[self.game._flatten(vertex)] = utils.BLACK + _board[self._flatten(vertex)] = utils.BLACK elif boarder_color == {utils.WHITE}: - self.game.board[self.game._flatten(vertex)] = utils.WHITE + _board[self._flatten(vertex)] = utils.WHITE elif is_unknown_estimation: - self.game.board[self.game._flatten(vertex)] = self._predict_from_nearby(vertex) + _board[self._flatten(vertex)] = self._predict_from_nearby(_board, vertex) else: - self.game.board[self.game._flatten(vertex)] =utils.UNKNOWN + _board[self._flatten(vertex)] =utils.UNKNOWN score = 0 - for i in self.game.board: + for i in _board: if i == utils.BLACK: score += 1 elif i == utils.WHITE: score -= 1 - score -= self.game.komi + score -= self.komi - self.game.board = _board return score diff --git a/AlphaGo/play.py b/AlphaGo/play.py index 562dd14..e18555f 100644 --- a/AlphaGo/play.py +++ b/AlphaGo/play.py @@ -82,7 +82,7 @@ if __name__ == '__main__': evaluate_rounds = 1 game_num = 0 try: - while True: + while game_num < evaluate_rounds: num = 0 pass_flag = [False, False] print("Start game {}".format(game_num)) @@ -132,6 +132,8 @@ if __name__ == '__main__': picklestring = cPickle.dump(data, file) data.reset() game_num += 1 + subprocess.call(["kill", "-9", str(agent_v0.pid)]) + subprocess.call(["kill", "-9", str(agent_v1.pid)]) except KeyboardInterrupt: subprocess.call(["kill", "-9", str(agent_v0.pid)]) subprocess.call(["kill", "-9", str(agent_v1.pid)]) diff --git a/AlphaGo/self-play.py b/AlphaGo/self-play.py index 63b7e97..4387b24 100644 --- a/AlphaGo/self-play.py +++ b/AlphaGo/self-play.py @@ -79,7 +79,7 @@ while True: prob.append(np.array(game.prob).reshape(-1, game.size ** 2 + 1)) print("Finished") print("\n") - score = game.game_engine.executor_get_score(True) + score = game.game_engine.executor_get_score(game.board, True) if score > 0: winner = utils.BLACK else: