repair komi. add todo for forbid pass:

2017-12-20 22:57:58 +08:00 · 2017-12-20 22:57:58 +08:00 · 00d2aa86bf
commit 00d2aa86bf
parent f0d59dab6c
3 changed files with 4 additions and 7 deletions
--- a/AlphaGo/engine.py
+++ b/AlphaGo/engine.py
@ -183,7 +183,7 @@ class GTPEngine():
            return 'unknown player', False

    def cmd_get_score(self, args, **kwargs):
-        return self._game.game_engine.executor_get_score(), None
+        return self._game.game_engine.executor_get_score(True), None

    def cmd_show_board(self, args, **kwargs):
        return self._game.board, True
--- a/AlphaGo/game.py
+++ b/AlphaGo/game.py
@ -23,7 +23,7 @@ class Game:
    TODO : Maybe merge with the engine class in future, 
    currently leave it untouched for interacting with Go UI.
    '''
-    def __init__(self, size=9, komi=6.5, checkpoint_path=None):
+    def __init__(self, size=9, komi=3.75, checkpoint_path=None):
        self.size = size
        self.komi = komi
        self.board = [utils.EMPTY] * (self.size ** 2)
@ -75,7 +75,7 @@ class Game:
        self.game_engine.simulate_board = copy.copy(latest_boards[-1])
        nn_input = self.generate_nn_input(self.game_engine.simulate_latest_boards, color)
        mcts = MCTS(self.game_engine, self.evaluator, [self.game_engine.simulate_latest_boards, color], self.size ** 2 + 1, inverse=True)
-        mcts.search(max_step=1)
+        mcts.search(max_step=5)
        temp = 1
        prob = mcts.root.N ** temp / np.sum(mcts.root.N ** temp)
        choice = np.random.choice(self.size ** 2 + 1, 1, p=prob).tolist()[0]
--- a/AlphaGo/go.py
+++ b/AlphaGo/go.py
@ -121,12 +121,8 @@ class Go:
        if self._is_eye(current_board, color, vertex):
            return False
            # forbid position on its own eye.
-        #if self._is_game_finish(current_board, color) and vertex == utils.PASS
-        #    return False
-        # forbid pass if the game is not finished.
        return True

-
    def _is_game_finished(self, current_board, color):
        '''
        for each empty position, if it has both BLACK and WHITE neighbors, the game is still not finished
@ -192,6 +188,7 @@ class Go:
        if len(invalid_action_list) < len(action_set) - 1:
            invalid_action_list.append(action_set[-1])
            # forbid pass, if we have other choices
+            # TODO: In fact we should not do this. In some extreme cases, we should permit pass.
        return invalid_action_list

    def _do_move(self, board, color, vertex):