merge class strategy with class game. Next, merge Go with GoEnv

This commit is contained in:
Dong Yan 2017-12-15 22:19:44 +08:00
parent 00f599bba3
commit 6cb4b02fca
9 changed files with 244 additions and 730 deletions

View File

@ -10,3 +10,7 @@ Connecting our own policy-value neural network with leela-zero.
## checkpoints: ## checkpoints:
Weights of the policy-value neural network Weights of the policy-value neural network
## File Specification

View File

@ -188,7 +188,6 @@ class GTPEngine():
def cmd_show_board(self, args, **kwargs): def cmd_show_board(self, args, **kwargs):
return self._game.board, True return self._game.board, True
if __name__ == "main": if __name__ == "main":
game = Game() game = Game()
engine = GTPEngine(game_obj=Game) engine = GTPEngine(game_obj=Game)

View File

@ -10,242 +10,49 @@ import copy
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import sys import sys
import go
import network_small
import strategy
from collections import deque from collections import deque
from tianshou.core.mcts.mcts import MCTS
import Network import Network
from strategy import strategy #from strategy import strategy
'''
(1, 1) is considered as the upper left corner of the board,
(size, 1) is the lower left
'''
DELTA = [[1, 0], [-1, 0], [0, -1], [0, 1]]
class Executor:
def __init__(self, **kwargs):
self.game = kwargs['game']
def _bfs(self, vertex, color, block, status, alive_break):
block.append(vertex)
status[self.game._flatten(vertex)] = True
nei = self._neighbor(vertex)
for n in nei:
if not status[self.game._flatten(n)]:
if self.game.board[self.game._flatten(n)] == color:
self._bfs(n, color, block, status, alive_break)
def _find_block(self, vertex, alive_break=False):
block = []
status = [False] * (self.game.size * self.game.size)
color = self.game.board[self.game._flatten(vertex)]
self._bfs(vertex, color, block, status, alive_break)
for b in block:
for n in self._neighbor(b):
if self.game.board[self.game._flatten(n)] == utils.EMPTY:
return False, block
return True, block
def _find_boarder(self, vertex):
block = []
status = [False] * (self.game.size * self.game.size)
self._bfs(vertex, utils.EMPTY, block, status, False)
border = []
for b in block:
for n in self._neighbor(b):
if not (n in block):
border.append(n)
return border
def _is_qi(self, color, vertex):
nei = self._neighbor(vertex)
for n in nei:
if self.game.board[self.game._flatten(n)] == utils.EMPTY:
return True
self.game.board[self.game._flatten(vertex)] = color
for n in nei:
if self.game.board[self.game._flatten(n)] == utils.another_color(color):
can_kill, block = self._find_block(n)
if can_kill:
self.game.board[self.game._flatten(vertex)] = utils.EMPTY
return True
### can not suicide
can_kill, block = self._find_block(vertex)
if can_kill:
self.game.board[self.game._flatten(vertex)] = utils.EMPTY
return False
self.game.board[self.game._flatten(vertex)] = utils.EMPTY
return True
def _check_global_isomorphous(self, color, vertex):
##backup
_board = copy.copy(self.game.board)
self.game.board[self.game._flatten(vertex)] = color
self._process_board(color, vertex)
if self.game.board in self.game.history:
res = True
else:
res = False
self.game.board = _board
return res
def _in_board(self, vertex):
x, y = vertex
if x < 1 or x > self.game.size: return False
if y < 1 or y > self.game.size: return False
return True
def _neighbor(self, vertex):
x, y = vertex
nei = []
for d in DELTA:
_x = x + d[0]
_y = y + d[1]
if self._in_board((_x, _y)):
nei.append((_x, _y))
return nei
def _process_board(self, color, vertex):
nei = self._neighbor(vertex)
for n in nei:
if self.game.board[self.game._flatten(n)] == utils.another_color(color):
can_kill, block = self._find_block(n, alive_break=True)
if can_kill:
for b in block:
self.game.board[self.game._flatten(b)] = utils.EMPTY
def is_valid(self, color, vertex):
### in board
if not self._in_board(vertex):
return False
### already have stone
if not self.game.board[self.game._flatten(vertex)] == utils.EMPTY:
return False
### check if it is qi
if not self._is_qi(color, vertex):
return False
if self._check_global_isomorphous(color, vertex):
return False
return True
def do_move(self, color, vertex):
if not self.is_valid(color, vertex):
return False
self.game.board[self.game._flatten(vertex)] = color
self._process_board(color, vertex)
self.game.history.append(copy.copy(self.game.board))
self.game.past.append(copy.copy(self.game.board))
return True
def _find_empty(self):
idx = [i for i,x in enumerate(self.game.board) if x == utils.EMPTY ][0]
return self.game._deflatten(idx)
def get_score(self, is_unknown_estimation = False):
'''
is_unknown_estimation: whether use nearby stone to predict the unknown
return score from BLACK perspective.
'''
_board = copy.copy(self.game.board)
while utils.EMPTY in self.game.board:
vertex = self._find_empty()
boarder = self._find_boarder(vertex)
boarder_color = set(map(lambda v: self.game.board[self.game._flatten(v)], boarder))
if boarder_color == {utils.BLACK}:
self.game.board[self.game._flatten(vertex)] = utils.BLACK
elif boarder_color == {utils.WHITE}:
self.game.board[self.game._flatten(vertex)] = utils.WHITE
elif is_unknown_estimation:
self.game.board[self.game._flatten(vertex)] = self._predict_from_nearby(vertex)
else:
self.game.board[self.game._flatten(vertex)] =utils.UNKNOWN
score = 0
for i in self.game.board:
if i == utils.BLACK:
score += 1
elif i == utils.WHITE:
score -= 1
score -= self.game.komi
self.game.board = _board
return score
def _predict_from_nearby(self, vertex, neighbor_step = 3):
'''
step: the nearby 3 steps is considered
:vertex: position to be estimated
:neighbor_step: how many steps nearby
:return: the nearby positions of the input position
currently the nearby 3*3 grid is returned, altogether 4*8 points involved
'''
for step in range(1, neighbor_step + 1): # check the stones within the steps in range
neighbor_vertex_set = []
self._add_nearby_stones(neighbor_vertex_set, vertex[0] - step, vertex[1], 1, 1, neighbor_step)
self._add_nearby_stones(neighbor_vertex_set, vertex[0], vertex[1] + step, 1, -1, neighbor_step)
self._add_nearby_stones(neighbor_vertex_set, vertex[0] + step, vertex[1], -1, -1, neighbor_step)
self._add_nearby_stones(neighbor_vertex_set, vertex[0], vertex[1] - step, -1, 1, neighbor_step)
color_estimate = 0
for neighbor_vertex in neighbor_vertex_set:
color_estimate += self.game.board[self.game._flatten(neighbor_vertex)]
if color_estimate > 0:
return utils.BLACK
elif color_estimate < 0:
return utils.WHITE
def _add_nearby_stones(self, neighbor_vertex_set, start_vertex_x, start_vertex_y, x_diff, y_diff, num_step):
'''
add the nearby stones around the input vertex
:param neighbor_vertex_set: input list
:param start_vertex_x: x axis of the input vertex
:param start_vertex_y: y axis of the input vertex
:param x_diff: add x axis
:param y_diff: add y axis
:param num_step: number of steps to be added
:return:
'''
for step in xrange(num_step):
new_neighbor_vertex = (start_vertex_x, start_vertex_y)
if self._in_board(new_neighbor_vertex):
neighbor_vertex_set.append((start_vertex_x, start_vertex_y))
start_vertex_x += x_diff
start_vertex_y += y_diff
class Game: class Game:
'''
Load the real game and trained weights.
TODO : Maybe merge with the engine class in future,
currently leave it untouched for interacting with Go UI.
'''
def __init__(self, size=9, komi=6.5, checkpoint_path=None): def __init__(self, size=9, komi=6.5, checkpoint_path=None):
self.size = size self.size = size
self.komi = komi self.komi = komi
self.board = [utils.EMPTY] * (self.size * self.size) self.board = [utils.EMPTY] * (self.size * self.size)
self.strategy = strategy(checkpoint_path)
# self.strategy = None
self.executor = Executor(game=self)
self.history = [] self.history = []
self.past = deque(maxlen=8) self.past = deque(maxlen=8)
for _ in range(8): for _ in range(8):
self.past.append(self.board) self.past.append(self.board)
self.executor = go.Go(game=self)
#self.strategy = strategy(checkpoint_path)
self.simulator = strategy.GoEnv()
self.net = network_small.Network()
self.sess = self.net.forward(checkpoint_path)
self.evaluator = lambda state: self.sess.run([tf.nn.softmax(self.net.p), self.net.v],
feed_dict={self.net.x: state, self.net.is_training: False})
def _flatten(self, vertex): def _flatten(self, vertex):
x, y = vertex x, y = vertex
return (y - 1) * self.size + (x - 1) return (y - 1) * self.size + (x - 1)
def _deflatten(self, idx): def _deflatten(self, idx):
x = idx % self.size + 1 x = idx % self.size + 1
y = idx // self.size + 1 y = idx // self.size + 1
return (x,y) return (x,y)
def clear(self): def clear(self):
self.board = [utils.EMPTY] * (self.size * self.size) self.board = [utils.EMPTY] * (self.size * self.size)
self.history = [] self.history = []
@ -259,8 +66,30 @@ class Game:
def set_komi(self, k): def set_komi(self, k):
self.komi = k self.komi = k
def check_valid(self, color, vertex): def data_process(self, history, color):
return self.executor.is_valid(color, vertex) state = np.zeros([1, self.simulator.size, self.simulator.size, 17])
for i in range(8):
state[0, :, :, i] = np.array(np.array(history[i]) == np.ones(self.simulator.size ** 2)).reshape(self.simulator.size, self.simulator.size)
state[0, :, :, i + 8] = np.array(np.array(history[i]) == -np.ones(self.simulator.size ** 2)).reshape(self.simulator.size, self.simulator.size)
if color == utils.BLACK:
state[0, :, :, 16] = np.ones([self.simulator.size, self.simulator.size])
if color == utils.WHITE:
state[0, :, :, 16] = np.zeros([self.simulator.size, self.simulator.size])
return state
def strategy_gen_move(self, history, color):
self.simulator.history = copy.copy(history)
self.simulator.board = copy.copy(history[-1])
state = self.data_process(self.simulator.history, color)
mcts = MCTS(self.simulator, self.evaluator, state, self.simulator.size ** 2 + 1, inverse=True, max_step=10)
temp = 1
prob = mcts.root.N ** temp / np.sum(mcts.root.N ** temp)
choice = np.random.choice(self.simulator.size ** 2 + 1, 1, p=prob).tolist()[0]
if choice == self.simulator.size ** 2:
move = utils.PASS
else:
move = (choice % self.simulator.size + 1, choice / self.simulator.size + 1)
return move, prob
def do_move(self, color, vertex): def do_move(self, color, vertex):
if vertex == utils.PASS: if vertex == utils.PASS:
@ -271,7 +100,7 @@ class Game:
def gen_move(self, color): def gen_move(self, color):
# move = self.strategy.gen_move(color) # move = self.strategy.gen_move(color)
# return move # return move
move, self.prob = self.strategy.gen_move(self.past, color) move, self.prob = self.strategy_gen_move(self.past, color)
self.do_move(color, move) self.do_move(color, move)
return move return move
@ -295,7 +124,6 @@ class Game:
print('') print('')
sys.stdout.flush() sys.stdout.flush()
if __name__ == "__main__": if __name__ == "__main__":
g = Game() g = Game()
g.show_board() g.show_board()

View File

@ -1,428 +1,212 @@
''' from __future__ import print_function
A board is a NxN numpy array. import utils
A Coordinate is a tuple index into the board.
A Move is a (Coordinate c | None).
A PlayerMove is a (Color, Move) tuple
(0, 0) is considered to be the upper left corner of the board, and (18, 0) is the lower left.
'''
from collections import namedtuple
import copy import copy
import itertools import sys
from collections import deque
import numpy as np '''
Settings of the Go game.
# Represent a board as a numpy array, with 0 empty, 1 is black, -1 is white. (1, 1) is considered as the upper left corner of the board,
# This means that swapping colors is as simple as multiplying array by -1. (size, 1) is the lower left
WHITE, EMPTY, BLACK, FILL, KO, UNKNOWN = range(-1, 5) '''
NEIGHBOR_OFFSET = [[1, 0], [-1, 0], [0, -1], [0, 1]]
class PlayerMove(namedtuple('PlayerMove', ['color', 'move'])): pass class Go:
def __init__(self, **kwargs):
self.game = kwargs['game']
def _bfs(self, vertex, color, block, status, alive_break):
block.append(vertex)
status[self.game._flatten(vertex)] = True
nei = self._neighbor(vertex)
for n in nei:
if not status[self.game._flatten(n)]:
if self.game.board[self.game._flatten(n)] == color:
self._bfs(n, color, block, status, alive_break)
# Represents "group not found" in the LibertyTracker object def _find_block(self, vertex, alive_break=False):
MISSING_GROUP_ID = -1 block = []
status = [False] * (self.game.size * self.game.size)
color = self.game.board[self.game._flatten(vertex)]
self._bfs(vertex, color, block, status, alive_break)
for b in block:
for n in self._neighbor(b):
if self.game.board[self.game._flatten(n)] == utils.EMPTY:
return False, block
return True, block
class IllegalMove(Exception): pass def _find_boarder(self, vertex):
block = []
status = [False] * (self.game.size * self.game.size)
self._bfs(vertex, utils.EMPTY, block, status, False)
border = []
for b in block:
for n in self._neighbor(b):
if not (n in block):
border.append(n)
return border
def _is_qi(self, color, vertex):
nei = self._neighbor(vertex)
for n in nei:
if self.game.board[self.game._flatten(n)] == utils.EMPTY:
return True
# these are initialized by set_board_size self.game.board[self.game._flatten(vertex)] = color
N = None for n in nei:
ALL_COORDS = [] if self.game.board[self.game._flatten(n)] == utils.another_color(color):
EMPTY_BOARD = None can_kill, block = self._find_block(n)
NEIGHBORS = {} if can_kill:
DIAGONALS = {} self.game.board[self.game._flatten(vertex)] = utils.EMPTY
return True
### can not suicide
def set_board_size(n): can_kill, block = self._find_block(vertex)
''' if can_kill:
Hopefully nobody tries to run both 9x9 and 19x19 game instances at once. self.game.board[self.game._flatten(vertex)] = utils.EMPTY
Also, never do "from go import N, W, ALL_COORDS, EMPTY_BOARD".
'''
global N, ALL_COORDS, EMPTY_BOARD, NEIGHBORS, DIAGONALS
if N == n: return
N = n
ALL_COORDS = [(i, j) for i in range(n) for j in range(n)]
EMPTY_BOARD = np.zeros([n, n], dtype=np.int8)
def check_bounds(c):
return c[0] % n == c[0] and c[1] % n == c[1]
NEIGHBORS = {(x, y): list(filter(check_bounds, [(x + 1, y), (x - 1, y), (x, y + 1), (x, y - 1)])) for x, y in
ALL_COORDS}
DIAGONALS = {(x, y): list(filter(check_bounds, [(x + 1, y + 1), (x + 1, y - 1), (x - 1, y + 1), (x - 1, y - 1)]))
for x, y in ALL_COORDS}
def place_stones(board, color, stones):
for s in stones:
board[s] = color
def find_reached(board, c):
# that can reach from one place
color = board[c]
chain = set([c])
reached = set()
frontier = [c]
while frontier:
current = frontier.pop()
chain.add(current)
for n in NEIGHBORS[current]:
if board[n] == color and (not n in chain):
frontier.append(n)
elif board[n] != color:
reached.add(n)
return chain, reached
def is_koish(board, c):
'Check if c is surrounded on all sides by 1 color, and return that color'
if board[c] != EMPTY: return None
neighbors = {board[n] for n in NEIGHBORS[c]}
if len(neighbors) == 1 and not EMPTY in neighbors:
return list(neighbors)[0]
else:
return None
def is_eyeish(board, c):
'Check if c is an eye, for the purpose of restricting MC rollouts.'
color = is_koish(board, c)
if color is None:
return None
diagonal_faults = 0
diagonals = DIAGONALS[c]
if len(diagonals) < 4:
diagonal_faults += 1
for d in diagonals:
if not board[d] in (color, EMPTY):
diagonal_faults += 1
if diagonal_faults > 1:
return None
else:
return color
class Group(namedtuple('Group', ['id', 'stones', 'liberties', 'color'])):
'''
stones: a set of Coordinates belonging to this group
liberties: a set of Coordinates that are empty and adjacent to this group.
color: color of this group
'''
def __eq__(self, other):
return self.stones == other.stones and self.liberties == other.liberties and self.color == other.color
class LibertyTracker(object):
@staticmethod
def from_board(board):
board = np.copy(board)
curr_group_id = 0
lib_tracker = LibertyTracker()
for color in (WHITE, BLACK):
while color in board:
curr_group_id += 1
found_color = np.where(board == color)
coord = found_color[0][0], found_color[1][0]
chain, reached = find_reached(board, coord)
liberties = set(r for r in reached if board[r] == EMPTY)
new_group = Group(curr_group_id, chain, liberties, color)
lib_tracker.groups[curr_group_id] = new_group
for s in chain:
lib_tracker.group_index[s] = curr_group_id
place_stones(board, FILL, chain)
lib_tracker.max_group_id = curr_group_id
liberty_counts = np.zeros([N, N], dtype=np.uint8)
for group in lib_tracker.groups.values():
num_libs = len(group.liberties)
for s in group.stones:
liberty_counts[s] = num_libs
lib_tracker.liberty_cache = liberty_counts
return lib_tracker
def __init__(self, group_index=None, groups=None, liberty_cache=None, max_group_id=1):
# group_index: a NxN numpy array of group_ids. -1 means no group
# groups: a dict of group_id to groups
# liberty_cache: a NxN numpy array of liberty counts
self.group_index = group_index if group_index is not None else -np.ones([N, N], dtype=np.int32)
self.groups = groups or {}
self.liberty_cache = liberty_cache if liberty_cache is not None else np.zeros([N, N], dtype=np.uint8)
self.max_group_id = max_group_id
def __deepcopy__(self, memodict={}):
new_group_index = np.copy(self.group_index)
new_lib_cache = np.copy(self.liberty_cache)
new_groups = {
group.id: Group(group.id, set(group.stones), set(group.liberties), group.color)
for group in self.groups.values()
}
return LibertyTracker(new_group_index, new_groups, liberty_cache=new_lib_cache, max_group_id=self.max_group_id)
def add_stone(self, color, c):
assert self.group_index[c] == MISSING_GROUP_ID
captured_stones = set()
opponent_neighboring_group_ids = set()
friendly_neighboring_group_ids = set()
empty_neighbors = set()
for n in NEIGHBORS[c]:
neighbor_group_id = self.group_index[n]
if neighbor_group_id != MISSING_GROUP_ID:
neighbor_group = self.groups[neighbor_group_id]
if neighbor_group.color == color:
friendly_neighboring_group_ids.add(neighbor_group_id)
else:
opponent_neighboring_group_ids.add(neighbor_group_id)
else:
empty_neighbors.add(n)
new_group = self._create_group(color, c, empty_neighbors)
for group_id in friendly_neighboring_group_ids:
new_group = self._merge_groups(group_id, new_group.id)
for group_id in opponent_neighboring_group_ids:
neighbor_group = self.groups[group_id]
if len(neighbor_group.liberties) == 1:
captured = self._capture_group(group_id)
captured_stones.update(captured)
else:
self._update_liberties(group_id, remove={c})
self._handle_captures(captured_stones)
# suicide is illegal
if len(new_group.liberties) == 0:
raise IllegalMove("Move at {} would commit suicide!\n".format(c))
return captured_stones
def _create_group(self, color, c, liberties):
self.max_group_id += 1
new_group = Group(self.max_group_id, set([c]), liberties, color)
self.groups[new_group.id] = new_group
self.group_index[c] = new_group.id
self.liberty_cache[c] = len(liberties)
return new_group
def _merge_groups(self, group1_id, group2_id):
group1 = self.groups[group1_id]
group2 = self.groups[group2_id]
group1.stones.update(group2.stones)
del self.groups[group2_id]
for s in group2.stones:
self.group_index[s] = group1_id
self._update_liberties(group1_id, add=group2.liberties, remove=(group2.stones | group1.stones))
return group1
def _capture_group(self, group_id):
dead_group = self.groups[group_id]
del self.groups[group_id]
for s in dead_group.stones:
self.group_index[s] = MISSING_GROUP_ID
self.liberty_cache[s] = 0
return dead_group.stones
def _update_liberties(self, group_id, add=None, remove=None):
group = self.groups[group_id]
if add:
group.liberties.update(add)
if remove:
group.liberties.difference_update(remove)
new_lib_count = len(group.liberties)
for s in group.stones:
self.liberty_cache[s] = new_lib_count
def _handle_captures(self, captured_stones):
for s in captured_stones:
for n in NEIGHBORS[s]:
group_id = self.group_index[n]
if group_id != MISSING_GROUP_ID:
self._update_liberties(group_id, add={s})
class Position():
def __init__(self, board=None, n=0, komi=7.5, caps=(0, 0), lib_tracker=None, ko=None, recent=tuple(),
to_play=BLACK):
'''
board: a numpy array
n: an int representing moves played so far
komi: a float, representing points given to the second player.
caps: a (int, int) tuple of captures for B, W.
lib_tracker: a LibertyTracker object
ko: a Move
recent: a tuple of PlayerMoves, such that recent[-1] is the last move.
to_play: BLACK or WHITE
'''
self.board = board if board is not None else np.copy(EMPTY_BOARD)
self.n = n
self.komi = komi
self.caps = caps
self.lib_tracker = lib_tracker or LibertyTracker.from_board(self.board)
self.ko = ko
self.recent = recent
self.to_play = to_play
def __deepcopy__(self, memodict={}):
new_board = np.copy(self.board)
new_lib_tracker = copy.deepcopy(self.lib_tracker)
return Position(new_board, self.n, self.komi, self.caps, new_lib_tracker, self.ko, self.recent, self.to_play)
def __str__(self):
pretty_print_map = {
WHITE: '\x1b[0;31;47mO',
EMPTY: '\x1b[0;31;43m.',
BLACK: '\x1b[0;31;40mX',
FILL: '#',
KO: '*',
}
board = np.copy(self.board)
captures = self.caps
if self.ko is not None:
place_stones(board, KO, [self.ko])
raw_board_contents = []
for i in range(N):
row = []
for j in range(N):
appended = '<' if (self.recent and (i, j) == self.recent[-1].move) else ' '
row.append(pretty_print_map[board[i, j]] + appended)
row.append('\x1b[0m')
raw_board_contents.append(''.join(row))
row_labels = ['%2d ' % i for i in range(N, 0, -1)]
annotated_board_contents = [''.join(r) for r in zip(row_labels, raw_board_contents, row_labels)]
header_footer_rows = [' ' + ' '.join('ABCDEFGHJKLMNOPQRST'[:N]) + ' ']
annotated_board = '\n'.join(itertools.chain(header_footer_rows, annotated_board_contents, header_footer_rows))
details = "\nMove: {}. Captures X: {} O: {}\n".format(self.n, *captures)
return annotated_board + details
def is_move_suicidal(self, move):
potential_libs = set()
for n in NEIGHBORS[move]:
neighbor_group_id = self.lib_tracker.group_index[n]
if neighbor_group_id == MISSING_GROUP_ID:
# at least one liberty after playing here, so not a suicide
return False
neighbor_group = self.lib_tracker.groups[neighbor_group_id]
if neighbor_group.color == self.to_play:
potential_libs |= neighbor_group.liberties
elif len(neighbor_group.liberties) == 1:
# would capture an opponent group if they only had one lib.
return False
# it's possible to suicide by connecting several friendly groups
# each of which had one liberty.
potential_libs -= set([move])
return not potential_libs
def is_move_legal(self, move):
'Checks that a move is on an empty space, not on ko, and not suicide'
if move is None:
return True
if self.board[move] != EMPTY:
return False return False
if move == self.ko:
self.game.board[self.game._flatten(vertex)] = utils.EMPTY
return True
def _check_global_isomorphous(self, color, vertex):
##backup
_board = copy.copy(self.game.board)
self.game.board[self.game._flatten(vertex)] = color
self._process_board(color, vertex)
if self.game.board in self.game.history:
res = True
else:
res = False
self.game.board = _board
return res
def _in_board(self, vertex):
x, y = vertex
if x < 1 or x > self.game.size: return False
if y < 1 or y > self.game.size: return False
return True
def _neighbor(self, vertex):
x, y = vertex
nei = []
for d in NEIGHBOR_OFFSET:
_x = x + d[0]
_y = y + d[1]
if self._in_board((_x, _y)):
nei.append((_x, _y))
return nei
def _process_board(self, color, vertex):
nei = self._neighbor(vertex)
for n in nei:
if self.game.board[self.game._flatten(n)] == utils.another_color(color):
can_kill, block = self._find_block(n, alive_break=True)
if can_kill:
for b in block:
self.game.board[self.game._flatten(b)] = utils.EMPTY
def is_valid(self, color, vertex):
### in board
if not self._in_board(vertex):
return False return False
if self.is_move_suicidal(move):
### already have stone
if not self.game.board[self.game._flatten(vertex)] == utils.EMPTY:
return False
### check if it is qi
if not self._is_qi(color, vertex):
return False
if self._check_global_isomorphous(color, vertex):
return False return False
return True return True
def pass_move(self, mutate=False): def do_move(self, color, vertex):
pos = self if mutate else copy.deepcopy(self) if not self.is_valid(color, vertex):
pos.n += 1 return False
pos.recent += (PlayerMove(pos.to_play, None),) self.game.board[self.game._flatten(vertex)] = color
pos.to_play *= -1 self._process_board(color, vertex)
pos.ko = None self.game.history.append(copy.copy(self.game.board))
return pos self.game.past.append(copy.copy(self.game.board))
return True
def flip_playerturn(self, mutate=False): def _find_empty(self):
pos = self if mutate else copy.deepcopy(self) idx = [i for i,x in enumerate(self.game.board) if x == utils.EMPTY ][0]
pos.ko = None return self.game._deflatten(idx)
pos.to_play *= -1
return pos
def get_liberties(self): def get_score(self, is_unknown_estimation = False):
return self.lib_tracker.liberty_cache '''
is_unknown_estimation: whether use nearby stone to predict the unknown
def play_move(self, c, color=None, mutate=False): return score from BLACK perspective.
# Obeys CGOS Rules of Play. In short: '''
# No suicides _board = copy.copy(self.game.board)
# Chinese/area scoring while utils.EMPTY in self.game.board:
# Positional superko (this is very crudely approximate at the moment.) vertex = self._find_empty()
if color is None: boarder = self._find_boarder(vertex)
color = self.to_play boarder_color = set(map(lambda v: self.game.board[self.game._flatten(v)], boarder))
if boarder_color == {utils.BLACK}:
pos = self if mutate else copy.deepcopy(self) self.game.board[self.game._flatten(vertex)] = utils.BLACK
elif boarder_color == {utils.WHITE}:
if c is None: self.game.board[self.game._flatten(vertex)] = utils.WHITE
pos = pos.pass_move(mutate=mutate) elif is_unknown_estimation:
return pos self.game.board[self.game._flatten(vertex)] = self._predict_from_nearby(vertex)
if not self.is_move_legal(c):
raise IllegalMove("Move at {} is illegal: \n{}".format(c, self))
# check must be done before potentially mutating the board
potential_ko = is_koish(self.board, c)
place_stones(pos.board, color, [c])
captured_stones = pos.lib_tracker.add_stone(color, c)
place_stones(pos.board, EMPTY, captured_stones)
opp_color = color * -1
if len(captured_stones) == 1 and potential_ko == opp_color:
new_ko = list(captured_stones)[0]
else:
new_ko = None
if pos.to_play == BLACK:
new_caps = (pos.caps[0] + len(captured_stones), pos.caps[1])
else:
new_caps = (pos.caps[0], pos.caps[1] + len(captured_stones))
pos.n += 1
pos.caps = new_caps
pos.ko = new_ko
pos.recent += (PlayerMove(color, c),)
pos.to_play *= -1
return pos
def score(self):
'Return score from B perspective. If W is winning, score is negative.'
working_board = np.copy(self.board)
while EMPTY in working_board:
unassigned_spaces = np.where(working_board == EMPTY)
c = unassigned_spaces[0][0], unassigned_spaces[1][0]
territory, borders = find_reached(working_board, c)
border_colors = set(working_board[b] for b in borders)
X_border = BLACK in border_colors
O_border = WHITE in border_colors
if X_border and not O_border:
territory_color = BLACK
elif O_border and not X_border:
territory_color = WHITE
else: else:
territory_color = UNKNOWN # dame, or seki self.game.board[self.game._flatten(vertex)] =utils.UNKNOWN
place_stones(working_board, territory_color, territory) score = 0
for i in self.game.board:
if i == utils.BLACK:
score += 1
elif i == utils.WHITE:
score -= 1
score -= self.game.komi
return np.count_nonzero(working_board == BLACK) - np.count_nonzero(working_board == WHITE) - self.komi self.game.board = _board
return score
def result(self): def _predict_from_nearby(self, vertex, neighbor_step = 3):
score = self.score() '''
if score > 0: step: the nearby 3 steps is considered
return 'B+' + '%.1f' % score :vertex: position to be estimated
elif score < 0: :neighbor_step: how many steps nearby
return 'W+' + '%.1f' % abs(score) :return: the nearby positions of the input position
else: currently the nearby 3*3 grid is returned, altogether 4*8 points involved
return 'DRAW' '''
for step in range(1, neighbor_step + 1): # check the stones within the steps in range
neighbor_vertex_set = []
self._add_nearby_stones(neighbor_vertex_set, vertex[0] - step, vertex[1], 1, 1, neighbor_step)
self._add_nearby_stones(neighbor_vertex_set, vertex[0], vertex[1] + step, 1, -1, neighbor_step)
self._add_nearby_stones(neighbor_vertex_set, vertex[0] + step, vertex[1], -1, -1, neighbor_step)
self._add_nearby_stones(neighbor_vertex_set, vertex[0], vertex[1] - step, -1, 1, neighbor_step)
color_estimate = 0
for neighbor_vertex in neighbor_vertex_set:
color_estimate += self.game.board[self.game._flatten(neighbor_vertex)]
if color_estimate > 0:
return utils.BLACK
elif color_estimate < 0:
return utils.WHITE
def _add_nearby_stones(self, neighbor_vertex_set, start_vertex_x, start_vertex_y, x_diff, y_diff, num_step):
set_board_size(19) '''
add the nearby stones around the input vertex
:param neighbor_vertex_set: input list
:param start_vertex_x: x axis of the input vertex
:param start_vertex_y: y axis of the input vertex
:param x_diff: add x axis
:param y_diff: add y axis
:param num_step: number of steps to be added
:return:
'''
for step in xrange(num_step):
new_neighbor_vertex = (start_vertex_x, start_vertex_y)
if self._in_board(new_neighbor_vertex):
neighbor_vertex_set.append((start_vertex_x, start_vertex_y))
start_vertex_x += x_diff
start_vertex_y += y_diff

View File

@ -1,70 +0,0 @@
import gtp
import go
import utils
def translate_gtp_colors(gtp_color):
if gtp_color == gtp.BLACK:
return go.BLACK
elif gtp_color == gtp.WHITE:
return go.WHITE
else:
return go.EMPTY
class GtpInterface(object):
def __init__(self):
self.size = 9
self.position = None
self.komi = 6.5
self.clear()
def set_size(self, n):
self.size = n
go.set_board_size(n)
self.clear()
def set_komi(self, komi):
self.komi = komi
self.position.komi = komi
def clear(self):
self.position = go.Position(komi=self.komi)
def accomodate_out_of_turn(self, color):
if not translate_gtp_colors(color) == self.position.to_play:
self.position.flip_playerturn(mutate=True)
def make_move(self, color, vertex):
coords = utils.parse_pygtp_coords(vertex)
self.accomodate_out_of_turn(color)
try:
self.position = self.position.play_move(coords, color=translate_gtp_colors(color))
except go.IllegalMove:
return False
return True
def get_move(self, color):
self.accomodate_out_of_turn(color)
if self.should_resign(self.position):
return gtp.RESIGN
if self.should_pass(self.position):
return gtp.PASS
move = self.suggest_move(self.position)
return utils.unparse_pygtp_coords(move)
def should_resign(self, position):
if position.caps[0] + 50 < position.caps[1]:
return gtp.RESIGN
def should_pass(self, position):
# Pass if the opponent passes
return position.n > 100 and position.recent and position.recent[-1].move == None
def get_score(self):
return self.position.result()
def suggest_move(self, position):
raise NotImplementedError

View File

@ -13,12 +13,11 @@ print "Start Name Sever : " + str(start_new_server.pid)# + str(start_new_server.
time.sleep(1) time.sleep(1)
agent_v0 = subprocess.Popen(['python', '-u', 'player.py', '--role=black'], agent_v0 = subprocess.Popen(['python', '-u', 'player.py', '--role=black'],
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
time.sleep(3)
print "Start Player 0 at : " + str(agent_v0.pid) print "Start Player 0 at : " + str(agent_v0.pid)
agent_v1 = subprocess.Popen(['python', '-u', 'player.py', '--role=white', '--checkpoint_path=./checkpoints_origin/'], agent_v1 = subprocess.Popen(['python', '-u', 'player.py', '--role=white', '--checkpoint_path=./checkpoints_origin/'],
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
time.sleep(3)
print "Start Player 1 at : " + str(agent_v1.pid) print "Start Player 1 at : " + str(agent_v1.pid)
time.sleep(5)
player = [None] * 2 player = [None] * 2
player[0] = Pyro4.Proxy("PYRONAME:black") player[0] = Pyro4.Proxy("PYRONAME:black")

View File

@ -8,6 +8,10 @@ from engine import GTPEngine
@Pyro4.expose @Pyro4.expose
class Player(object): class Player(object):
"""
This is the class which defines the object called by Pyro4 (Python remote object).
It passes the command to our engine, and return the result.
"""
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.role = kwargs['role'] self.role = kwargs['role']
self.engine = kwargs['engine'] self.engine = kwargs['engine']

View File

@ -13,7 +13,6 @@ from tianshou.core.mcts.mcts import MCTS
DELTA = [[1, 0], [-1, 0], [0, -1], [0, 1]] DELTA = [[1, 0], [-1, 0], [0, -1], [0, 1]]
CORNER_OFFSET = [[-1, -1], [-1, 1], [1, 1], [1, -1]] CORNER_OFFSET = [[-1, -1], [-1, 1], [1, 1], [1, -1]]
class GoEnv: class GoEnv:
def __init__(self, size=9, komi=6.5): def __init__(self, size=9, komi=6.5):
self.size = size self.size = size
@ -221,37 +220,3 @@ class GoEnv:
np.array(1 - state[:, :, :, -1]).reshape(1, self.size, self.size, 1)], np.array(1 - state[:, :, :, -1]).reshape(1, self.size, self.size, 1)],
axis=3) axis=3)
return new_state, 0 return new_state, 0
class strategy(object):
def __init__(self, checkpoint_path):
self.simulator = GoEnv()
self.net = network_small.Network()
self.sess = self.net.forward(checkpoint_path)
self.evaluator = lambda state: self.sess.run([tf.nn.softmax(self.net.p), self.net.v],
feed_dict={self.net.x: state, self.net.is_training: False})
def data_process(self, history, color):
state = np.zeros([1, self.simulator.size, self.simulator.size, 17])
for i in range(8):
state[0, :, :, i] = np.array(np.array(history[i]) == np.ones(self.simulator.size ** 2)).reshape(self.simulator.size, self.simulator.size)
state[0, :, :, i + 8] = np.array(np.array(history[i]) == -np.ones(self.simulator.size ** 2)).reshape(self.simulator.size, self.simulator.size)
if color == utils.BLACK:
state[0, :, :, 16] = np.ones([self.simulator.size, self.simulator.size])
if color == utils.WHITE:
state[0, :, :, 16] = np.zeros([self.simulator.size, self.simulator.size])
return state
def gen_move(self, history, color):
self.simulator.history = copy.copy(history)
self.simulator.board = copy.copy(history[-1])
state = self.data_process(self.simulator.history, color)
mcts = MCTS(self.simulator, self.evaluator, state, self.simulator.size ** 2 + 1, inverse=True, max_step=10)
temp = 1
prob = mcts.root.N ** temp / np.sum(mcts.root.N ** temp)
choice = np.random.choice(self.simulator.size ** 2 + 1, 1, p=prob).tolist()[0]
if choice == self.simulator.size ** 2:
move = utils.PASS
else:
move = (choice % self.simulator.size + 1, choice / self.simulator.size + 1)
return move, prob

View File

@ -168,6 +168,7 @@ class MCTS(object):
if max_step is None and max_time is None: if max_step is None and max_time is None:
raise ValueError("Need a stop criteria!") raise ValueError("Need a stop criteria!")
# TODO: running mcts should be implemented in another function, e.g. def search(self, max_step, max_time)
self.select_time = [] self.select_time = []
self.evaluate_time = [] self.evaluate_time = []
self.bp_time = [] self.bp_time = []