merge flatten and deflatten, rename variable for clarity

2017-12-19 16:51:50 +08:00 · 2017-12-19 16:51:50 +08:00 · fc8114fe35
commit fc8114fe35
parent 4a2d8f0003
5 changed files with 31 additions and 39 deletions
--- a/AlphaGo/engine.py
+++ b/AlphaGo/engine.py
@ -167,7 +167,7 @@ class GTPEngine():
        move = self._parse_move(args)
        if move:
            color, vertex = move
-            res = self._game.do_move(color, vertex)
+            res = self._game.play_move(color, vertex)
            if res:
                return None, True
            else:
@ -177,7 +177,7 @@ class GTPEngine():
    def cmd_genmove(self, args, **kwargs):
        color = self._parse_color(args)
        if color:
-            move = self._game.gen_move(color)
+            move = self._game.think_play_move(color)
            return self._vertex_point2string(move), True
        else:
            return 'unknown player', False
--- a/AlphaGo/game.py
+++ b/AlphaGo/game.py
@ -77,7 +77,7 @@ class Game:
            state[0, :, :, 16] = np.zeros([self.size, self.size])
        return state

-    def strategy_gen_move(self, latest_boards, color):
+    def think(self, latest_boards, color):
        self.simulator.simulate_latest_boards = copy.copy(latest_boards)
        self.simulator.simulate_board = copy.copy(latest_boards[-1])
        nn_input = self.generate_nn_input(self.simulator.simulate_latest_boards, color)
@ -91,17 +91,18 @@ class Game:
            move = self._deflatten(choice)
        return move, prob

-    def do_move(self, color, vertex):
+    def play_move(self, color, vertex):
+        # this function can be called directly to play the opponent's move
        if vertex == utils.PASS:
            return True
        res = self.executor.do_move(color, vertex)
        return res

-    def gen_move(self, color):
-        # move = self.strategy.gen_move(color)
-        # return move
-        move, self.prob = self.strategy_gen_move(self.latest_boards, color)
-        self.do_move(color, move)
+    def think_play_move(self, color):
+        # although we dont need to return self.prob, however it is needed for neural network training
+        move, self.prob = self.think(self.latest_boards, color)
+        # play the move immediately
+        self.play_move(color, move)
        return move

    def status2symbol(self, s):
--- a/AlphaGo/strategy.py
+++ b/AlphaGo/strategy.py
@ -10,7 +10,7 @@ import tensorflow as tf
 from collections import deque
 from tianshou.core.mcts.mcts import MCTS

-DELTA = [[1, 0], [-1, 0], [0, -1], [0, 1]]
+NEIGHBOR_OFFSET = [[1, 0], [-1, 0], [0, -1], [0, 1]]
 CORNER_OFFSET = [[-1, -1], [-1, 1], [1, 1], [1, -1]]

 class GoEnv:
@ -19,17 +19,8 @@ class GoEnv:
        self.simulate_board = [utils.EMPTY] * (self.game.size ** 2)
        self.simulate_latest_boards = deque(maxlen=8)

-    def simulate_flatten(self, vertex):
-        x, y = vertex
-        return (x - 1) * self.game.size + (y - 1)
-
-    def simulate_deflatten(self, idx):
-        x = idx // self.game.size + 1
-        y = idx % self.game.size + 1
-        return (x, y)
-
    def _find_group(self, start):
-        color = self.simulate_board[self.simulate_flatten(start)]
+        color = self.simulate_board[self.game._flatten(start)]
        # print ("color : ", color)
        chain = set()
        frontier = [start]
@ -40,32 +31,32 @@ class GoEnv:
            chain.add(current)
            for n in self._neighbor(current):
                # print n, self._flatten(n), self.board[self._flatten(n)],
-                if self.simulate_board[self.simulate_flatten(n)] == color and not n in chain:
+                if self.simulate_board[self.game._flatten(n)] == color and not n in chain:
                    frontier.append(n)
-                if self.simulate_board[self.simulate_flatten(n)] == utils.EMPTY:
+                if self.simulate_board[self.game._flatten(n)] == utils.EMPTY:
                    has_liberty = True
        return has_liberty, chain

    def _is_suicide(self, color, vertex):
-        self.simulate_board[self.simulate_flatten(vertex)] = color # assume that we already take this move
+        self.simulate_board[self.game._flatten(vertex)] = color # assume that we already take this move
        suicide = False

        has_liberty, group = self._find_group(vertex)
        if not has_liberty:
            suicide = True # no liberty, suicide
            for n in self._neighbor(vertex):
-                if self.simulate_board[self.simulate_flatten(n)] == utils.another_color(color):
+                if self.simulate_board[self.game._flatten(n)] == utils.another_color(color):
                    opponent_liberty, group = self._find_group(n)
                    if not opponent_liberty:
                        suicide = False # this move is able to take opponent's stone, not suicide

-        self.simulate_board[self.simulate_flatten(vertex)] = utils.EMPTY # undo this move
+        self.simulate_board[self.game._flatten(vertex)] = utils.EMPTY # undo this move
        return suicide

    def _check_global_isomorphous(self, color, vertex):
        ##backup
        _board = copy.copy(self.simulate_board)
-        self.simulate_board[self.simulate_flatten(vertex)] = color
+        self.simulate_board[self.game._flatten(vertex)] = color
        self._process_board(color, vertex)
        if self.simulate_board in self.game.history:
            res = True
@ -84,7 +75,7 @@ class GoEnv:
    def _neighbor(self, vertex):
        x, y = vertex
        nei = []
-        for d in DELTA:
+        for d in NEIGHBOR_OFFSET:
            _x = x + d[0]
            _y = y + d[1]
            if self._in_board((_x, _y)):
@ -104,16 +95,16 @@ class GoEnv:
    def _process_board(self, color, vertex):
        nei = self._neighbor(vertex)
        for n in nei:
-            if self.simulate_board[self.simulate_flatten(n)] == utils.another_color(color):
+            if self.simulate_board[self.game._flatten(n)] == utils.another_color(color):
                has_liberty, group = self._find_group(n)
                if not has_liberty:
                    for b in group:
-                        self.simulate_board[self.simulate_flatten(b)] = utils.EMPTY
+                        self.simulate_board[self.game._flatten(b)] = utils.EMPTY

    def _is_eye(self, color, vertex):
        nei = self._neighbor(vertex)
        cor = self._corner(vertex)
-        ncolor = {color == self.simulate_board[self.simulate_flatten(n)] for n in nei}
+        ncolor = {color == self.simulate_board[self.game._flatten(n)] for n in nei}
        if False in ncolor:
            # print "not all neighbors are in same color with us"
            return False
@ -122,7 +113,7 @@ class GoEnv:
            # print "all neighbors are in same group and same color with us"
            return True
        else:
-            opponent_number = [self.simulate_board[self.simulate_flatten(c)] for c in cor].count(-color)
+            opponent_number = [self.simulate_board[self.game._flatten(c)] for c in cor].count(-color)
            opponent_propotion = float(opponent_number) / float(len(cor))
            if opponent_propotion < 0.5:
                # print "few opponents, real eye"
@ -145,7 +136,7 @@ class GoEnv:
        if action == self.game.size ** 2:
            vertex = (0, 0)
        else:
-            vertex = self.simulate_deflatten(action)
+            vertex = self.game._deflatten(action)
        if state[0, 0, 0, -1] == utils.BLACK:
            color = utils.BLACK
        else:
@ -160,7 +151,7 @@ class GoEnv:
            return False

        ### already have stone
-        if not self.simulate_board[self.simulate_flatten(vertex)] == utils.EMPTY:
+        if not self.simulate_board[self.game._flatten(vertex)] == utils.EMPTY:
            # print(np.array(self.board).reshape(9, 9))
            # print(vertex)
            return False
@ -182,14 +173,14 @@ class GoEnv:
        if vertex == utils.PASS:
            return True

-        id_ = self.simulate_flatten(vertex)
+        id_ = self.game._flatten(vertex)
        if self.simulate_board[id_] == utils.EMPTY:
            self.simulate_board[id_] = color
            return True
        else:
            return False

-    def step_forward(self, state, action):
+    def simulate_step_forward(self, state, action):
        if state[0, 0, 0, -1] == 1:
            color = utils.BLACK
        else:
@ -197,7 +188,7 @@ class GoEnv:
        if action == self.game.size ** 2:
            vertex = utils.PASS
        else:
-            vertex = self.simulate_deflatten(action)
+            vertex = self.game._deflatten(action)
        # print(vertex)
        # print(self.board)
        self.simulate_board = (state[:, :, :, 7] - state[:, :, :, 15]).reshape(-1).tolist()
--- a/tianshou/core/mcts/evaluator.py
+++ b/tianshou/core/mcts/evaluator.py
@ -19,10 +19,10 @@ class rollout_policy(evaluator):
        # TODO: prior for rollout policy
        total_reward = 0.
        action = np.random.randint(0, self.action_num)
-        state, reward = self.env.step_forward(state, action)
+        state, reward = self.env.simulate_step_forward(state, action)
        total_reward += reward
        while state is not None:
            action = np.random.randint(0, self.action_num)
-            state, reward = self.env.step_forward(state, action)
+            state, reward = self.env.simulate_step_forward(state, action)
            total_reward += reward
        return np.ones([self.action_num])/self.action_num, total_reward
--- a/tianshou/core/mcts/mcts.py
+++ b/tianshou/core/mcts/mcts.py
@ -116,7 +116,7 @@ class ActionNode(object):
            self.next_state = tuple2list(self.next_state)

    def selection(self, simulator):
-        self.next_state, self.reward = simulator.step_forward(self.parent.state, self.action)
+        self.next_state, self.reward = simulator.simulate_step_forward(self.parent.state, self.action)
        self.origin_state = self.next_state
        self.state_type = type(self.next_state)
        self.type_conversion_to_tuple()