From b2ef770415ade966dcc29073973bfea3a447481b Mon Sep 17 00:00:00 2001 From: Dong Yan Date: Sat, 23 Dec 2017 13:05:25 +0800 Subject: [PATCH] connect reversi with game --- AlphaGo/engine.py | 4 ++-- AlphaGo/game.py | 44 +++++++++++++++++++++++++------------- AlphaGo/go.py | 28 +++++++++++------------- AlphaGo/play.py | 1 - AlphaGo/reversi.py | 16 +++++++++----- AlphaGo/self-play.py | 2 +- tianshou/core/mcts/mcts.py | 2 +- 7 files changed, 57 insertions(+), 40 deletions(-) diff --git a/AlphaGo/engine.py b/AlphaGo/engine.py index 8b54470..98e5e61 100644 --- a/AlphaGo/engine.py +++ b/AlphaGo/engine.py @@ -183,7 +183,7 @@ class GTPEngine(): return 'unknown player', False def cmd_get_score(self, args, **kwargs): - return self._game.game_engine.executor_get_score(self._game.board, True), True + return self._game.game_engine.executor_get_score(self._game.board), True def cmd_show_board(self, args, **kwargs): return self._game.board, True @@ -194,4 +194,4 @@ class GTPEngine(): if __name__ == "main": game = Game() - engine = GTPEngine(game_obj=Game) + engine = GTPEngine(game_obj=game) diff --git a/AlphaGo/game.py b/AlphaGo/game.py index df08c0a..ff1faf5 100644 --- a/AlphaGo/game.py +++ b/AlphaGo/game.py @@ -10,12 +10,14 @@ import copy import tensorflow as tf import numpy as np import sys, os -import go import model from collections import deque sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir)) from tianshou.core.mcts.mcts import MCTS +import go +import reversi + class Game: ''' Load the real game and trained weights. @@ -23,18 +25,26 @@ class Game: TODO : Maybe merge with the engine class in future, currently leave it untouched for interacting with Go UI. ''' - def __init__(self, size=9, komi=3.75, checkpoint_path=None): - self.size = size - self.komi = komi - self.board = [utils.EMPTY] * (self.size ** 2) - self.history = [] - self.latest_boards = deque(maxlen=8) - for _ in range(8): - self.latest_boards.append(self.board) - self.evaluator = model.ResNet(self.size, self.size**2 + 1, history_length=8, checkpoint_path=checkpoint_path) - # self.evaluator = lambda state: self.sess.run([tf.nn.softmax(self.net.p), self.net.v], - # feed_dict={self.net.x: state, self.net.is_training: False}) - self.game_engine = go.Go(size=self.size, komi=self.komi) + def __init__(self, name="go", checkpoint_path=None): + self.name = name + if "go" == name: + self.size = 9 + self.komi = 3.75 + self.board = [utils.EMPTY] * (self.size ** 2) + self.history = [] + self.latest_boards = deque(maxlen=8) + for _ in range(8): + self.latest_boards.append(self.board) + + self.evaluator = model.ResNet(self.size, self.size**2 + 1, history_length=8) + self.game_engine = go.Go(size=self.size, komi=self.komi) + elif "reversi" == name: + self.size = 8 + self.evaluator = model.ResNet(self.size, self.size**2 + 1, history_length=1) + self.game_engine = reversi.Reversi() + self.board = self.game_engine.get_board() + else: + print(name + " is an unknown game...") def clear(self): self.board = [utils.EMPTY] * (self.size ** 2) @@ -65,7 +75,11 @@ class Game: # this function can be called directly to play the opponent's move if vertex == utils.PASS: return True - res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex) + # TODO this implementation is not very elegant + if "go" == self.name: + res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex) + elif "revsersi" == self.name: + res = self.game_engine.executor_do_move(self.board, color, vertex) return res def think_play_move(self, color): @@ -96,7 +110,7 @@ class Game: sys.stdout.flush() if __name__ == "__main__": - g = Game(checkpoint_path='./checkpoints/') + g = Game() g.show_board() g.think_play_move(1) #file = open("debug.txt", "a") diff --git a/AlphaGo/go.py b/AlphaGo/go.py index 661d918..b819c08 100644 --- a/AlphaGo/go.py +++ b/AlphaGo/go.py @@ -157,7 +157,7 @@ class Go: vertex = self._deflatten(action) return vertex - def _is_valid(self, history_boards, current_board, color, vertex): + def _rule_check(self, history_boards, current_board, color, vertex): ### in board if not self._in_board(vertex): return False @@ -176,30 +176,30 @@ class Go: return True - def simulate_is_valid(self, state, action): + def _is_valid(self, state, action): history_boards, color = state vertex = self._action2vertex(action) current_board = history_boards[-1] - if not self._is_valid(history_boards, current_board, color, vertex): + if not self._rule_check(history_boards, current_board, color, vertex): return False if not self._knowledge_prunning(current_board, color, vertex): return False return True - def simulate_is_valid_list(self, state, action_set): + def simulate_get_mask(self, state, action_set): # find all the invalid actions - invalid_action_list = [] + invalid_action_mask = [] for action_candidate in action_set[:-1]: # go through all the actions excluding pass - if not self.simulate_is_valid(state, action_candidate): - invalid_action_list.append(action_candidate) - if len(invalid_action_list) < len(action_set) - 1: - invalid_action_list.append(action_set[-1]) + if not self._is_valid(state, action_candidate): + invalid_action_mask.append(action_candidate) + if len(invalid_action_mask) < len(action_set) - 1: + invalid_action_mask.append(action_set[-1]) # forbid pass, if we have other choices # TODO: In fact we should not do this. In some extreme cases, we should permit pass. - return invalid_action_list + return invalid_action_mask def _do_move(self, board, color, vertex): if vertex == utils.PASS: @@ -219,7 +219,7 @@ class Go: return [history_boards, new_color], 0 def executor_do_move(self, history, latest_boards, current_board, color, vertex): - if not self._is_valid(history, current_board, color, vertex): + if not self._rule_check(history, current_board, color, vertex): return False current_board[self._flatten(vertex)] = color self._process_board(current_board, color, vertex) @@ -280,7 +280,7 @@ class Go: elif color_estimate < 0: return utils.WHITE - def executor_get_score(self, current_board, is_unknown_estimation=False): + def executor_get_score(self, current_board): ''' is_unknown_estimation: whether use nearby stone to predict the unknown return score from BLACK perspective. @@ -294,10 +294,8 @@ class Go: _board[self._flatten(vertex)] = utils.BLACK elif boarder_color == {utils.WHITE}: _board[self._flatten(vertex)] = utils.WHITE - elif is_unknown_estimation: - _board[self._flatten(vertex)] = self._predict_from_nearby(_board, vertex) else: - _board[self._flatten(vertex)] =utils.UNKNOWN + _board[self._flatten(vertex)] = self._predict_from_nearby(_board, vertex) score = 0 for i in _board: if i == utils.BLACK: diff --git a/AlphaGo/play.py b/AlphaGo/play.py index 3681430..b601ada 100644 --- a/AlphaGo/play.py +++ b/AlphaGo/play.py @@ -7,7 +7,6 @@ import time import os import cPickle - class Data(object): def __init__(self): self.boards = [] diff --git a/AlphaGo/reversi.py b/AlphaGo/reversi.py index cba91d9..d67a882 100644 --- a/AlphaGo/reversi.py +++ b/AlphaGo/reversi.py @@ -25,7 +25,6 @@ def find_correct_moves(own, enemy): mobility |= search_offset_right(own, enemy, mask, 7) # Left bottom return mobility - def calc_flip(pos, own, enemy): """return flip stones of enemy by bitboard when I place stone at pos. @@ -133,7 +132,9 @@ class Reversi: self.board = self.bitboard2board() return self.board - def simulate_is_valid(self, board, color): + def simulate_get_mask(self, state, action_set): + history_boards, color = state + board = history_boards[-1] self.board = board self.color = color self.board2bitboard() @@ -142,13 +143,18 @@ class Reversi: valid_moves = bit_to_array(mobility, 64) valid_moves = np.argwhere(valid_moves) valid_moves = list(np.reshape(valid_moves, len(valid_moves))) - return valid_moves + # TODO it seems that the pass move is not considered + invalid_action_mask = [] + for action in action_set: + if action not in valid_moves: + invalid_action_mask.append(action) + return invalid_action_mask - def simulate_step_forward(self, state, vertex): + def simulate_step_forward(self, state, action): self.board = state[0] self.color = state[1] self.board2bitboard() - self.vertex2action(vertex) + self.action = action step_forward = self.step() if step_forward: new_board = self.bitboard2board() diff --git a/AlphaGo/self-play.py b/AlphaGo/self-play.py index 4387b24..dd03b13 100644 --- a/AlphaGo/self-play.py +++ b/AlphaGo/self-play.py @@ -79,7 +79,7 @@ while True: prob.append(np.array(game.prob).reshape(-1, game.size ** 2 + 1)) print("Finished") print("\n") - score = game.game_engine.executor_get_score(game.board, True) + score = game.game_engine.executor_get_score(game.board) if score > 0: winner = utils.BLACK else: diff --git a/tianshou/core/mcts/mcts.py b/tianshou/core/mcts/mcts.py index 8bb5f06..e8f3709 100644 --- a/tianshou/core/mcts/mcts.py +++ b/tianshou/core/mcts/mcts.py @@ -73,7 +73,7 @@ class UCTNode(MCTSNode): def valid_mask(self, simulator): # let all invalid actions be illeagel in mcts if self.mask is None: - self.mask = simulator.simulate_is_valid_list(self.state, range(self.action_num)) + self.mask = simulator.simulate_get_mask(self.state, range(self.action_num)) self.ucb[self.mask] = -float("Inf")