diff --git a/AlphaGo/engine.py b/AlphaGo/engine.py index 1ee8833..d11635a 100644 --- a/AlphaGo/engine.py +++ b/AlphaGo/engine.py @@ -183,7 +183,7 @@ class GTPEngine(): return 'unknown player', False def cmd_get_score(self, args, **kwargs): - return self._game.executor.get_score(), None + return self._game.executor.executor_get_score(), None def cmd_show_board(self, args, **kwargs): return self._game.board, True diff --git a/AlphaGo/game.py b/AlphaGo/game.py index d0cb91c..af4ef57 100644 --- a/AlphaGo/game.py +++ b/AlphaGo/game.py @@ -78,6 +78,7 @@ class Game: return state def think(self, latest_boards, color): + # TODO : using copy is right, or should we change to deepcopy? self.simulator.simulate_latest_boards = copy.copy(latest_boards) self.simulator.simulate_board = copy.copy(latest_boards[-1]) nn_input = self.generate_nn_input(self.simulator.simulate_latest_boards, color) @@ -95,7 +96,7 @@ class Game: # this function can be called directly to play the opponent's move if vertex == utils.PASS: return True - res = self.executor.do_move(color, vertex) + res = self.executor.executor_do_move(color, vertex) return res def think_play_move(self, color): diff --git a/AlphaGo/go.py b/AlphaGo/go.py index 37d8339..108c9bd 100644 --- a/AlphaGo/go.py +++ b/AlphaGo/go.py @@ -84,7 +84,7 @@ class Go: repeat = True return repeat - def is_valid(self, history_boards, current_board, color, vertex): + def _is_valid(self, history_boards, current_board, color, vertex): ### in board if not self._in_board(vertex): return False @@ -102,8 +102,8 @@ class Go: return True - def do_move(self, color, vertex): - if not self.is_valid(self.game.history, self.game.board, color, vertex): + def executor_do_move(self, color, vertex): + if not self._is_valid(self.game.history, self.game.board, color, vertex): return False self.game.board[self.game._flatten(vertex)] = color self._process_board(self.game.board, color, vertex) @@ -164,7 +164,7 @@ class Go: elif color_estimate < 0: return utils.WHITE - def get_score(self, is_unknown_estimation = False): + def executor_get_score(self, is_unknown_estimation = False): ''' is_unknown_estimation: whether use nearby stone to predict the unknown return score from BLACK perspective. diff --git a/AlphaGo/self-play.py b/AlphaGo/self-play.py index 98ccf84..296112b 100644 --- a/AlphaGo/self-play.py +++ b/AlphaGo/self-play.py @@ -79,7 +79,7 @@ while True: prob.append(np.array(game.prob).reshape(-1, game.size ** 2 + 1)) print("Finished") print("\n") - score = game.executor.get_score(True) + score = game.executor.executor_get_score(True) if score > 0: winner = utils.BLACK else: diff --git a/AlphaGo/strategy.py b/AlphaGo/strategy.py index 9ebd421..1e5fd02 100644 --- a/AlphaGo/strategy.py +++ b/AlphaGo/strategy.py @@ -117,14 +117,14 @@ class GoEnv: # print "many opponents, fake eye" return False - def knowledge_prunning(self, current_board, color, vertex): + def _knowledge_prunning(self, current_board, color, vertex): ### check if it is an eye of yourself ### assumptions : notice that this judgement requires that the state is an endgame if self._is_eye(current_board, color, vertex): return False return True - def sa2cv(self, state, action): + def _sa2cv(self, state, action): # State is the play board, the shape is [1, self.game.size, self.game.size, 17], action is an index. # We need to transfer the (state, action) pair into (color, vertex) pair to simulate the move if state[0, 0, 0, -1] == utils.BLACK: @@ -137,23 +137,13 @@ class GoEnv: vertex = self.game._deflatten(action) return color, vertex - def simulate_is_valid(self, history_boards, current_board, state, action): - # initialize simulate_latest_boards and simulate_board from state - self.simulate_latest_boards.clear() - for i in range(8): - self.simulate_latest_boards.append((state[:, :, :, i] - state[:, :, :, i + 8]).reshape(-1).tolist()) - self.simulate_board = copy.copy(self.simulate_latest_boards[-1]) - - color, vertex = self.sa2cv(state, action) - + def _is_valid(self, history_boards, current_board, color, vertex): ### in board if not self._in_board(vertex): return False ### already have stone if not current_board[self.game._flatten(vertex)] == utils.EMPTY: - # print(np.array(self.board).reshape(9, 9)) - # print(vertex) return False ### check if it is suicide @@ -164,12 +154,26 @@ class GoEnv: if self._check_global_isomorphous(history_boards, current_board, color, vertex): return False - if not self.knowledge_prunning(current_board, color, vertex): + return True + + def simulate_is_valid(self, history_boards, current_board, state, action): + # initialize simulate_latest_boards and simulate_board from state + self.simulate_latest_boards.clear() + for i in range(8): + self.simulate_latest_boards.append((state[:, :, :, i] - state[:, :, :, i + 8]).reshape(-1).tolist()) + self.simulate_board = copy.copy(self.simulate_latest_boards[-1]) + + color, vertex = self._sa2cv(state, action) + + if not self._is_valid(history_boards, current_board, color, vertex): + return False + + if not self._knowledge_prunning(current_board, color, vertex): return False return True - def simulate_do_move(self, color, vertex): + def _do_move(self, color, vertex): if vertex == utils.PASS: return True @@ -184,9 +188,9 @@ class GoEnv: # initialize the simulate_board from state self.simulate_board = (state[:, :, :, 7] - state[:, :, :, 15]).reshape(-1).tolist() - color, vertex = self.sa2cv(state, action) + color, vertex = self._sa2cv(state, action) - self.simulate_do_move(color, vertex) + self._do_move(color, vertex) new_state = np.concatenate( [state[:, :, :, 1:8], (np.array(self.simulate_board) == utils.BLACK).reshape(1, self.game.size, self.game.size, 1), state[:, :, :, 9:16], (np.array(self.simulate_board) == utils.WHITE).reshape(1, self.game.size, self.game.size, 1),