final version before merge Go and GoEnv

2017-12-20 00:43:31 +08:00 · 2017-12-20 00:43:31 +08:00 · 0456e0c15e
commit 0456e0c15e
parent afc5dbac5a
5 changed files with 29 additions and 24 deletions
--- a/AlphaGo/engine.py
+++ b/AlphaGo/engine.py
@ -183,7 +183,7 @@ class GTPEngine():
            return 'unknown player', False
    def cmd_get_score(self, args, **kwargs):
-        return self._game.executor.get_score(), None
+        return self._game.executor.executor_get_score(), None
    def cmd_show_board(self, args, **kwargs):
        return self._game.board, True
--- a/AlphaGo/game.py
+++ b/AlphaGo/game.py
@ -78,6 +78,7 @@ class Game:
        return state
    def think(self, latest_boards, color):
        # TODO : using copy is right, or should we change to deepcopy?
        self.simulator.simulate_latest_boards = copy.copy(latest_boards)
        self.simulator.simulate_board = copy.copy(latest_boards[-1])
        nn_input = self.generate_nn_input(self.simulator.simulate_latest_boards, color)
@ -95,7 +96,7 @@ class Game:
        # this function can be called directly to play the opponent's move
        if vertex == utils.PASS:
            return True
-        res = self.executor.do_move(color, vertex)
+        res = self.executor.executor_do_move(color, vertex)
        return res
    def think_play_move(self, color):
--- a/AlphaGo/go.py
+++ b/AlphaGo/go.py
@ -84,7 +84,7 @@ class Go:
            repeat = True
        return repeat
-    def is_valid(self, history_boards, current_board, color, vertex):
+    def _is_valid(self, history_boards, current_board, color, vertex):
        ### in board
        if not self._in_board(vertex):
            return False
@ -102,8 +102,8 @@ class Go:
        return True
-    def do_move(self, color, vertex):
+    def executor_do_move(self, color, vertex):
-        if not self.is_valid(self.game.history, self.game.board, color, vertex):
+        if not self._is_valid(self.game.history, self.game.board, color, vertex):
            return False
        self.game.board[self.game._flatten(vertex)] = color
        self._process_board(self.game.board, color, vertex)
@ -164,7 +164,7 @@ class Go:
            elif color_estimate < 0:
                return utils.WHITE
-    def get_score(self, is_unknown_estimation = False):
+    def executor_get_score(self, is_unknown_estimation = False):
        '''
            is_unknown_estimation: whether use nearby stone to predict the unknown
            return score from BLACK perspective.
--- a/AlphaGo/self-play.py
+++ b/AlphaGo/self-play.py
@ -79,7 +79,7 @@ while True:
        prob.append(np.array(game.prob).reshape(-1, game.size ** 2 + 1))
    print("Finished")
    print("\n")
-    score = game.executor.get_score(True)
+    score = game.executor.executor_get_score(True)
    if score > 0:
        winner = utils.BLACK
    else:
--- a/AlphaGo/strategy.py
+++ b/AlphaGo/strategy.py
@ -117,14 +117,14 @@ class GoEnv:
                # print "many opponents, fake eye"
                return False
-    def knowledge_prunning(self, current_board, color, vertex):
+    def _knowledge_prunning(self, current_board, color, vertex):
        ### check if it is an eye of yourself
        ### assumptions : notice that this judgement requires that the state is an endgame
        if self._is_eye(current_board, color, vertex):
            return False
        return True
-    def sa2cv(self, state, action):
+    def _sa2cv(self, state, action):
        # State is the play board, the shape is [1, self.game.size, self.game.size, 17], action is an index.
        # We need to transfer the (state, action) pair into (color, vertex) pair to simulate the move
        if state[0, 0, 0, -1] == utils.BLACK:
@ -137,23 +137,13 @@ class GoEnv:
            vertex = self.game._deflatten(action)
        return color, vertex
-    def simulate_is_valid(self, history_boards, current_board, state, action):
+    def _is_valid(self, history_boards, current_board, color, vertex):
        # initialize simulate_latest_boards and simulate_board from state
        self.simulate_latest_boards.clear()
        for i in range(8):
            self.simulate_latest_boards.append((state[:, :, :, i] - state[:, :, :, i + 8]).reshape(-1).tolist())
        self.simulate_board = copy.copy(self.simulate_latest_boards[-1])
        color, vertex = self.sa2cv(state, action)
        ### in board
        if not self._in_board(vertex):
            return False
        ### already have stone
        if not current_board[self.game._flatten(vertex)] == utils.EMPTY:
            # print(np.array(self.board).reshape(9, 9))
            # print(vertex)
            return False
        ### check if it is suicide
@ -164,12 +154,26 @@ class GoEnv:
        if self._check_global_isomorphous(history_boards, current_board, color, vertex):
            return False
-        if not self.knowledge_prunning(current_board, color, vertex):
+        return True
    def simulate_is_valid(self, history_boards, current_board, state, action):
        # initialize simulate_latest_boards and simulate_board from state
        self.simulate_latest_boards.clear()
        for i in range(8):
            self.simulate_latest_boards.append((state[:, :, :, i] - state[:, :, :, i + 8]).reshape(-1).tolist())
        self.simulate_board = copy.copy(self.simulate_latest_boards[-1])
        color, vertex = self._sa2cv(state, action)
        if not self._is_valid(history_boards, current_board, color, vertex):
            return False
        if not self._knowledge_prunning(current_board, color, vertex):
            return False
        return True
-    def simulate_do_move(self, color, vertex):
+    def _do_move(self, color, vertex):
        if vertex == utils.PASS:
            return True
@ -184,9 +188,9 @@ class GoEnv:
        # initialize the simulate_board from state
        self.simulate_board = (state[:, :, :, 7] - state[:, :, :, 15]).reshape(-1).tolist()
-        color, vertex = self.sa2cv(state, action)
+        color, vertex = self._sa2cv(state, action)
-        self.simulate_do_move(color, vertex)
+        self._do_move(color, vertex)
        new_state = np.concatenate(
            [state[:, :, :, 1:8], (np.array(self.simulate_board) == utils.BLACK).reshape(1, self.game.size, self.game.size, 1),
             state[:, :, :, 9:16], (np.array(self.simulate_board) == utils.WHITE).reshape(1, self.game.size, self.game.size, 1),