From db40994e1145aed511b2b80e503334178ade3c14 Mon Sep 17 00:00:00 2001 From: Dong Yan Date: Wed, 20 Dec 2017 01:14:05 +0800 Subject: [PATCH] merge Go and GoEnv finallygit status! --- AlphaGo/engine.py | 2 +- AlphaGo/game.py | 23 ++--- AlphaGo/go.py | 99 ++++++++++++++++++++- AlphaGo/self-play.py | 2 +- AlphaGo/strategy.py | 199 ------------------------------------------- 5 files changed, 108 insertions(+), 217 deletions(-) delete mode 100644 AlphaGo/strategy.py diff --git a/AlphaGo/engine.py b/AlphaGo/engine.py index d11635a..9948176 100644 --- a/AlphaGo/engine.py +++ b/AlphaGo/engine.py @@ -183,7 +183,7 @@ class GTPEngine(): return 'unknown player', False def cmd_get_score(self, args, **kwargs): - return self._game.executor.executor_get_score(), None + return self._game.game_engine.executor_get_score(), None def cmd_show_board(self, args, **kwargs): return self._game.board, True diff --git a/AlphaGo/game.py b/AlphaGo/game.py index af4ef57..aee8d3a 100644 --- a/AlphaGo/game.py +++ b/AlphaGo/game.py @@ -9,16 +9,13 @@ import utils import copy import tensorflow as tf import numpy as np -import sys +import sys, os import go import network_small -import strategy from collections import deque +sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir)) from tianshou.core.mcts.mcts import MCTS -import Network -#from strategy import strategy - class Game: ''' Load the real game and trained weights. @@ -34,15 +31,11 @@ class Game: self.latest_boards = deque(maxlen=8) for _ in range(8): self.latest_boards.append(self.board) - - self.executor = go.Go(game=self) - #self.strategy = strategy(checkpoint_path) - - self.simulator = strategy.GoEnv(game=self) self.net = network_small.Network() self.sess = self.net.forward(checkpoint_path) self.evaluator = lambda state: self.sess.run([tf.nn.softmax(self.net.p), self.net.v], feed_dict={self.net.x: state, self.net.is_training: False}) + self.game_engine = go.Go(game=self) def _flatten(self, vertex): x, y = vertex @@ -79,10 +72,10 @@ class Game: def think(self, latest_boards, color): # TODO : using copy is right, or should we change to deepcopy? - self.simulator.simulate_latest_boards = copy.copy(latest_boards) - self.simulator.simulate_board = copy.copy(latest_boards[-1]) - nn_input = self.generate_nn_input(self.simulator.simulate_latest_boards, color) - mcts = MCTS(self.simulator, self.evaluator, nn_input, self.size ** 2 + 1, inverse=True, max_step=1) + self.game_engine.simulate_latest_boards = copy.copy(latest_boards) + self.game_engine.simulate_board = copy.copy(latest_boards[-1]) + nn_input = self.generate_nn_input(self.game_engine.simulate_latest_boards, color) + mcts = MCTS(self.game_engine, self.evaluator, nn_input, self.size ** 2 + 1, inverse=True, max_step=1) temp = 1 prob = mcts.root.N ** temp / np.sum(mcts.root.N ** temp) choice = np.random.choice(self.size ** 2 + 1, 1, p=prob).tolist()[0] @@ -96,7 +89,7 @@ class Game: # this function can be called directly to play the opponent's move if vertex == utils.PASS: return True - res = self.executor.executor_do_move(color, vertex) + res = self.game_engine.executor_do_move(color, vertex) return res def think_play_move(self, color): diff --git a/AlphaGo/go.py b/AlphaGo/go.py index 108c9bd..10ce7e1 100644 --- a/AlphaGo/go.py +++ b/AlphaGo/go.py @@ -1,7 +1,7 @@ from __future__ import print_function import utils import copy -import sys +import numpy as np from collections import deque ''' @@ -12,10 +12,13 @@ Settings of the Go game. ''' NEIGHBOR_OFFSET = [[1, 0], [-1, 0], [0, -1], [0, 1]] +CORNER_OFFSET = [[-1, -1], [-1, 1], [1, 1], [1, -1]] class Go: def __init__(self, **kwargs): self.game = kwargs['game'] + self.simulate_board = [utils.EMPTY] * (self.game.size ** 2) + self.simulate_latest_boards = deque(maxlen=8) def _in_board(self, vertex): x, y = vertex @@ -33,6 +36,16 @@ class Go: nei.append((_x, _y)) return nei + def _corner(self, vertex): + x, y = vertex + corner = [] + for d in CORNER_OFFSET: + _x = x + d[0] + _y = y + d[1] + if self._in_board((_x, _y)): + corner.append((_x, _y)) + return corner + def _find_group(self, current_board, vertex): color = current_board[self.game._flatten(vertex)] # print ("color : ", color) @@ -84,6 +97,47 @@ class Go: repeat = True return repeat + def _is_eye(self, current_board, color, vertex): + nei = self._neighbor(vertex) + cor = self._corner(vertex) + ncolor = {color == current_board[self.game._flatten(n)] for n in nei} + if False in ncolor: + # print "not all neighbors are in same color with us" + return False + _, group = self._find_group(current_board, nei[0]) + if set(nei) < group: + # print "all neighbors are in same group and same color with us" + return True + else: + opponent_number = [current_board[self.game._flatten(c)] for c in cor].count(-color) + opponent_propotion = float(opponent_number) / float(len(cor)) + if opponent_propotion < 0.5: + # print "few opponents, real eye" + return True + else: + # print "many opponents, fake eye" + return False + + def _knowledge_prunning(self, current_board, color, vertex): + ### check if it is an eye of yourself + ### assumptions : notice that this judgement requires that the state is an endgame + if self._is_eye(current_board, color, vertex): + return False + return True + + def _sa2cv(self, state, action): + # State is the play board, the shape is [1, self.game.size, self.game.size, 17], action is an index. + # We need to transfer the (state, action) pair into (color, vertex) pair to simulate the move + if state[0, 0, 0, -1] == utils.BLACK: + color = utils.BLACK + else: + color = utils.WHITE + if action == self.game.size ** 2: + vertex = (0, 0) + else: + vertex = self.game._deflatten(action) + return color, vertex + def _is_valid(self, history_boards, current_board, color, vertex): ### in board if not self._in_board(vertex): @@ -97,11 +151,54 @@ class Go: if self._is_suicide(current_board, color, vertex): return False + ### forbid global isomorphous if self._check_global_isomorphous(history_boards, current_board, color, vertex): return False return True + def simulate_is_valid(self, history_boards, current_board, state, action): + # initialize simulate_latest_boards and simulate_board from state + self.simulate_latest_boards.clear() + for i in range(8): + self.simulate_latest_boards.append((state[:, :, :, i] - state[:, :, :, i + 8]).reshape(-1).tolist()) + self.simulate_board = copy.copy(self.simulate_latest_boards[-1]) + + color, vertex = self._sa2cv(state, action) + + if not self._is_valid(history_boards, current_board, color, vertex): + return False + + if not self._knowledge_prunning(current_board, color, vertex): + return False + + return True + + def _do_move(self, color, vertex): + if vertex == utils.PASS: + return True + + id_ = self.game._flatten(vertex) + if self.simulate_board[id_] == utils.EMPTY: + self.simulate_board[id_] = color + return True + else: + return False + + def simulate_step_forward(self, state, action): + # initialize the simulate_board from state + self.simulate_board = (state[:, :, :, 7] - state[:, :, :, 15]).reshape(-1).tolist() + + color, vertex = self._sa2cv(state, action) + + self._do_move(color, vertex) + new_state = np.concatenate( + [state[:, :, :, 1:8], (np.array(self.simulate_board) == utils.BLACK).reshape(1, self.game.size, self.game.size, 1), + state[:, :, :, 9:16], (np.array(self.simulate_board) == utils.WHITE).reshape(1, self.game.size, self.game.size, 1), + np.array(1 - state[:, :, :, -1]).reshape(1, self.game.size, self.game.size, 1)], + axis=3) + return new_state, 0 + def executor_do_move(self, color, vertex): if not self._is_valid(self.game.history, self.game.board, color, vertex): return False diff --git a/AlphaGo/self-play.py b/AlphaGo/self-play.py index 296112b..63b7e97 100644 --- a/AlphaGo/self-play.py +++ b/AlphaGo/self-play.py @@ -79,7 +79,7 @@ while True: prob.append(np.array(game.prob).reshape(-1, game.size ** 2 + 1)) print("Finished") print("\n") - score = game.executor.executor_get_score(True) + score = game.game_engine.executor_get_score(True) if score > 0: winner = utils.BLACK else: diff --git a/AlphaGo/strategy.py b/AlphaGo/strategy.py deleted file mode 100644 index 1e5fd02..0000000 --- a/AlphaGo/strategy.py +++ /dev/null @@ -1,199 +0,0 @@ -import os, sys - -sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir)) -import numpy as np -import utils -import time -import copy -import network_small -import tensorflow as tf -from collections import deque -from tianshou.core.mcts.mcts import MCTS - -NEIGHBOR_OFFSET = [[1, 0], [-1, 0], [0, -1], [0, 1]] -CORNER_OFFSET = [[-1, -1], [-1, 1], [1, 1], [1, -1]] - -class GoEnv: - def __init__(self, **kwargs): - self.game = kwargs['game'] - self.simulate_board = [utils.EMPTY] * (self.game.size ** 2) - self.simulate_latest_boards = deque(maxlen=8) - - def _in_board(self, vertex): - x, y = vertex - if x < 1 or x > self.game.size: return False - if y < 1 or y > self.game.size: return False - return True - - def _neighbor(self, vertex): - x, y = vertex - nei = [] - for d in NEIGHBOR_OFFSET: - _x = x + d[0] - _y = y + d[1] - if self._in_board((_x, _y)): - nei.append((_x, _y)) - return nei - - def _corner(self, vertex): - x, y = vertex - corner = [] - for d in CORNER_OFFSET: - _x = x + d[0] - _y = y + d[1] - if self._in_board((_x, _y)): - corner.append((_x, _y)) - return corner - - def _find_group(self, current_board, vertex): - color = current_board[self.game._flatten(vertex)] - # print ("color : ", color) - chain = set() - frontier = [vertex] - has_liberty = False - while frontier: - current = frontier.pop() - # print ("current : ", current) - chain.add(current) - for n in self._neighbor(current): - if current_board[self.game._flatten(n)] == color and not n in chain: - frontier.append(n) - if current_board[self.game._flatten(n)] == utils.EMPTY: - has_liberty = True - return has_liberty, chain - - def _is_suicide(self, current_board, color, vertex): - current_board[self.game._flatten(vertex)] = color # assume that we already take this move - suicide = False - - has_liberty, group = self._find_group(current_board, vertex) - if not has_liberty: - suicide = True # no liberty, suicide - for n in self._neighbor(vertex): - if current_board[self.game._flatten(n)] == utils.another_color(color): - opponent_liberty, group = self._find_group(current_board, n) - if not opponent_liberty: - suicide = False # this move is able to take opponent's stone, not suicide - - current_board[self.game._flatten(vertex)] = utils.EMPTY # undo this move - return suicide - - def _process_board(self, current_board, color, vertex): - nei = self._neighbor(vertex) - for n in nei: - if current_board[self.game._flatten(n)] == utils.another_color(color): - has_liberty, group = self._find_group(current_board, n) - if not has_liberty: - for b in group: - current_board[self.game._flatten(b)] = utils.EMPTY - - def _check_global_isomorphous(self, history_boards, current_board, color, vertex): - repeat = False - next_board = copy.copy(current_board) - next_board[self.game._flatten(vertex)] = color - self._process_board(next_board, color, vertex) - if next_board in history_boards: - repeat = True - return repeat - - def _is_eye(self, current_board, color, vertex): - nei = self._neighbor(vertex) - cor = self._corner(vertex) - ncolor = {color == current_board[self.game._flatten(n)] for n in nei} - if False in ncolor: - # print "not all neighbors are in same color with us" - return False - _, group = self._find_group(current_board, nei[0]) - if set(nei) < group: - # print "all neighbors are in same group and same color with us" - return True - else: - opponent_number = [current_board[self.game._flatten(c)] for c in cor].count(-color) - opponent_propotion = float(opponent_number) / float(len(cor)) - if opponent_propotion < 0.5: - # print "few opponents, real eye" - return True - else: - # print "many opponents, fake eye" - return False - - def _knowledge_prunning(self, current_board, color, vertex): - ### check if it is an eye of yourself - ### assumptions : notice that this judgement requires that the state is an endgame - if self._is_eye(current_board, color, vertex): - return False - return True - - def _sa2cv(self, state, action): - # State is the play board, the shape is [1, self.game.size, self.game.size, 17], action is an index. - # We need to transfer the (state, action) pair into (color, vertex) pair to simulate the move - if state[0, 0, 0, -1] == utils.BLACK: - color = utils.BLACK - else: - color = utils.WHITE - if action == self.game.size ** 2: - vertex = (0, 0) - else: - vertex = self.game._deflatten(action) - return color, vertex - - def _is_valid(self, history_boards, current_board, color, vertex): - ### in board - if not self._in_board(vertex): - return False - - ### already have stone - if not current_board[self.game._flatten(vertex)] == utils.EMPTY: - return False - - ### check if it is suicide - if self._is_suicide(current_board, color, vertex): - return False - - ### forbid global isomorphous - if self._check_global_isomorphous(history_boards, current_board, color, vertex): - return False - - return True - - def simulate_is_valid(self, history_boards, current_board, state, action): - # initialize simulate_latest_boards and simulate_board from state - self.simulate_latest_boards.clear() - for i in range(8): - self.simulate_latest_boards.append((state[:, :, :, i] - state[:, :, :, i + 8]).reshape(-1).tolist()) - self.simulate_board = copy.copy(self.simulate_latest_boards[-1]) - - color, vertex = self._sa2cv(state, action) - - if not self._is_valid(history_boards, current_board, color, vertex): - return False - - if not self._knowledge_prunning(current_board, color, vertex): - return False - - return True - - def _do_move(self, color, vertex): - if vertex == utils.PASS: - return True - - id_ = self.game._flatten(vertex) - if self.simulate_board[id_] == utils.EMPTY: - self.simulate_board[id_] = color - return True - else: - return False - - def simulate_step_forward(self, state, action): - # initialize the simulate_board from state - self.simulate_board = (state[:, :, :, 7] - state[:, :, :, 15]).reshape(-1).tolist() - - color, vertex = self._sa2cv(state, action) - - self._do_move(color, vertex) - new_state = np.concatenate( - [state[:, :, :, 1:8], (np.array(self.simulate_board) == utils.BLACK).reshape(1, self.game.size, self.game.size, 1), - state[:, :, :, 9:16], (np.array(self.simulate_board) == utils.WHITE).reshape(1, self.game.size, self.game.size, 1), - np.array(1 - state[:, :, :, -1]).reshape(1, self.game.size, self.game.size, 1)], - axis=3) - return new_state, 0