merge Go and GoEnv finallygit status!

This commit is contained in:
Dong Yan 2017-12-20 01:14:05 +08:00
parent 0456e0c15e
commit db40994e11
5 changed files with 108 additions and 217 deletions

View File

@ -183,7 +183,7 @@ class GTPEngine():
return 'unknown player', False return 'unknown player', False
def cmd_get_score(self, args, **kwargs): def cmd_get_score(self, args, **kwargs):
return self._game.executor.executor_get_score(), None return self._game.game_engine.executor_get_score(), None
def cmd_show_board(self, args, **kwargs): def cmd_show_board(self, args, **kwargs):
return self._game.board, True return self._game.board, True

View File

@ -9,16 +9,13 @@ import utils
import copy import copy
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import sys import sys, os
import go import go
import network_small import network_small
import strategy
from collections import deque from collections import deque
sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir))
from tianshou.core.mcts.mcts import MCTS from tianshou.core.mcts.mcts import MCTS
import Network
#from strategy import strategy
class Game: class Game:
''' '''
Load the real game and trained weights. Load the real game and trained weights.
@ -34,15 +31,11 @@ class Game:
self.latest_boards = deque(maxlen=8) self.latest_boards = deque(maxlen=8)
for _ in range(8): for _ in range(8):
self.latest_boards.append(self.board) self.latest_boards.append(self.board)
self.executor = go.Go(game=self)
#self.strategy = strategy(checkpoint_path)
self.simulator = strategy.GoEnv(game=self)
self.net = network_small.Network() self.net = network_small.Network()
self.sess = self.net.forward(checkpoint_path) self.sess = self.net.forward(checkpoint_path)
self.evaluator = lambda state: self.sess.run([tf.nn.softmax(self.net.p), self.net.v], self.evaluator = lambda state: self.sess.run([tf.nn.softmax(self.net.p), self.net.v],
feed_dict={self.net.x: state, self.net.is_training: False}) feed_dict={self.net.x: state, self.net.is_training: False})
self.game_engine = go.Go(game=self)
def _flatten(self, vertex): def _flatten(self, vertex):
x, y = vertex x, y = vertex
@ -79,10 +72,10 @@ class Game:
def think(self, latest_boards, color): def think(self, latest_boards, color):
# TODO : using copy is right, or should we change to deepcopy? # TODO : using copy is right, or should we change to deepcopy?
self.simulator.simulate_latest_boards = copy.copy(latest_boards) self.game_engine.simulate_latest_boards = copy.copy(latest_boards)
self.simulator.simulate_board = copy.copy(latest_boards[-1]) self.game_engine.simulate_board = copy.copy(latest_boards[-1])
nn_input = self.generate_nn_input(self.simulator.simulate_latest_boards, color) nn_input = self.generate_nn_input(self.game_engine.simulate_latest_boards, color)
mcts = MCTS(self.simulator, self.evaluator, nn_input, self.size ** 2 + 1, inverse=True, max_step=1) mcts = MCTS(self.game_engine, self.evaluator, nn_input, self.size ** 2 + 1, inverse=True, max_step=1)
temp = 1 temp = 1
prob = mcts.root.N ** temp / np.sum(mcts.root.N ** temp) prob = mcts.root.N ** temp / np.sum(mcts.root.N ** temp)
choice = np.random.choice(self.size ** 2 + 1, 1, p=prob).tolist()[0] choice = np.random.choice(self.size ** 2 + 1, 1, p=prob).tolist()[0]
@ -96,7 +89,7 @@ class Game:
# this function can be called directly to play the opponent's move # this function can be called directly to play the opponent's move
if vertex == utils.PASS: if vertex == utils.PASS:
return True return True
res = self.executor.executor_do_move(color, vertex) res = self.game_engine.executor_do_move(color, vertex)
return res return res
def think_play_move(self, color): def think_play_move(self, color):

View File

@ -1,7 +1,7 @@
from __future__ import print_function from __future__ import print_function
import utils import utils
import copy import copy
import sys import numpy as np
from collections import deque from collections import deque
''' '''
@ -12,10 +12,13 @@ Settings of the Go game.
''' '''
NEIGHBOR_OFFSET = [[1, 0], [-1, 0], [0, -1], [0, 1]] NEIGHBOR_OFFSET = [[1, 0], [-1, 0], [0, -1], [0, 1]]
CORNER_OFFSET = [[-1, -1], [-1, 1], [1, 1], [1, -1]]
class Go: class Go:
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.game = kwargs['game'] self.game = kwargs['game']
self.simulate_board = [utils.EMPTY] * (self.game.size ** 2)
self.simulate_latest_boards = deque(maxlen=8)
def _in_board(self, vertex): def _in_board(self, vertex):
x, y = vertex x, y = vertex
@ -33,6 +36,16 @@ class Go:
nei.append((_x, _y)) nei.append((_x, _y))
return nei return nei
def _corner(self, vertex):
x, y = vertex
corner = []
for d in CORNER_OFFSET:
_x = x + d[0]
_y = y + d[1]
if self._in_board((_x, _y)):
corner.append((_x, _y))
return corner
def _find_group(self, current_board, vertex): def _find_group(self, current_board, vertex):
color = current_board[self.game._flatten(vertex)] color = current_board[self.game._flatten(vertex)]
# print ("color : ", color) # print ("color : ", color)
@ -84,6 +97,47 @@ class Go:
repeat = True repeat = True
return repeat return repeat
def _is_eye(self, current_board, color, vertex):
nei = self._neighbor(vertex)
cor = self._corner(vertex)
ncolor = {color == current_board[self.game._flatten(n)] for n in nei}
if False in ncolor:
# print "not all neighbors are in same color with us"
return False
_, group = self._find_group(current_board, nei[0])
if set(nei) < group:
# print "all neighbors are in same group and same color with us"
return True
else:
opponent_number = [current_board[self.game._flatten(c)] for c in cor].count(-color)
opponent_propotion = float(opponent_number) / float(len(cor))
if opponent_propotion < 0.5:
# print "few opponents, real eye"
return True
else:
# print "many opponents, fake eye"
return False
def _knowledge_prunning(self, current_board, color, vertex):
### check if it is an eye of yourself
### assumptions : notice that this judgement requires that the state is an endgame
if self._is_eye(current_board, color, vertex):
return False
return True
def _sa2cv(self, state, action):
# State is the play board, the shape is [1, self.game.size, self.game.size, 17], action is an index.
# We need to transfer the (state, action) pair into (color, vertex) pair to simulate the move
if state[0, 0, 0, -1] == utils.BLACK:
color = utils.BLACK
else:
color = utils.WHITE
if action == self.game.size ** 2:
vertex = (0, 0)
else:
vertex = self.game._deflatten(action)
return color, vertex
def _is_valid(self, history_boards, current_board, color, vertex): def _is_valid(self, history_boards, current_board, color, vertex):
### in board ### in board
if not self._in_board(vertex): if not self._in_board(vertex):
@ -97,11 +151,54 @@ class Go:
if self._is_suicide(current_board, color, vertex): if self._is_suicide(current_board, color, vertex):
return False return False
### forbid global isomorphous
if self._check_global_isomorphous(history_boards, current_board, color, vertex): if self._check_global_isomorphous(history_boards, current_board, color, vertex):
return False return False
return True return True
def simulate_is_valid(self, history_boards, current_board, state, action):
# initialize simulate_latest_boards and simulate_board from state
self.simulate_latest_boards.clear()
for i in range(8):
self.simulate_latest_boards.append((state[:, :, :, i] - state[:, :, :, i + 8]).reshape(-1).tolist())
self.simulate_board = copy.copy(self.simulate_latest_boards[-1])
color, vertex = self._sa2cv(state, action)
if not self._is_valid(history_boards, current_board, color, vertex):
return False
if not self._knowledge_prunning(current_board, color, vertex):
return False
return True
def _do_move(self, color, vertex):
if vertex == utils.PASS:
return True
id_ = self.game._flatten(vertex)
if self.simulate_board[id_] == utils.EMPTY:
self.simulate_board[id_] = color
return True
else:
return False
def simulate_step_forward(self, state, action):
# initialize the simulate_board from state
self.simulate_board = (state[:, :, :, 7] - state[:, :, :, 15]).reshape(-1).tolist()
color, vertex = self._sa2cv(state, action)
self._do_move(color, vertex)
new_state = np.concatenate(
[state[:, :, :, 1:8], (np.array(self.simulate_board) == utils.BLACK).reshape(1, self.game.size, self.game.size, 1),
state[:, :, :, 9:16], (np.array(self.simulate_board) == utils.WHITE).reshape(1, self.game.size, self.game.size, 1),
np.array(1 - state[:, :, :, -1]).reshape(1, self.game.size, self.game.size, 1)],
axis=3)
return new_state, 0
def executor_do_move(self, color, vertex): def executor_do_move(self, color, vertex):
if not self._is_valid(self.game.history, self.game.board, color, vertex): if not self._is_valid(self.game.history, self.game.board, color, vertex):
return False return False

View File

@ -79,7 +79,7 @@ while True:
prob.append(np.array(game.prob).reshape(-1, game.size ** 2 + 1)) prob.append(np.array(game.prob).reshape(-1, game.size ** 2 + 1))
print("Finished") print("Finished")
print("\n") print("\n")
score = game.executor.executor_get_score(True) score = game.game_engine.executor_get_score(True)
if score > 0: if score > 0:
winner = utils.BLACK winner = utils.BLACK
else: else:

View File

@ -1,199 +0,0 @@
import os, sys
sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir))
import numpy as np
import utils
import time
import copy
import network_small
import tensorflow as tf
from collections import deque
from tianshou.core.mcts.mcts import MCTS
NEIGHBOR_OFFSET = [[1, 0], [-1, 0], [0, -1], [0, 1]]
CORNER_OFFSET = [[-1, -1], [-1, 1], [1, 1], [1, -1]]
class GoEnv:
def __init__(self, **kwargs):
self.game = kwargs['game']
self.simulate_board = [utils.EMPTY] * (self.game.size ** 2)
self.simulate_latest_boards = deque(maxlen=8)
def _in_board(self, vertex):
x, y = vertex
if x < 1 or x > self.game.size: return False
if y < 1 or y > self.game.size: return False
return True
def _neighbor(self, vertex):
x, y = vertex
nei = []
for d in NEIGHBOR_OFFSET:
_x = x + d[0]
_y = y + d[1]
if self._in_board((_x, _y)):
nei.append((_x, _y))
return nei
def _corner(self, vertex):
x, y = vertex
corner = []
for d in CORNER_OFFSET:
_x = x + d[0]
_y = y + d[1]
if self._in_board((_x, _y)):
corner.append((_x, _y))
return corner
def _find_group(self, current_board, vertex):
color = current_board[self.game._flatten(vertex)]
# print ("color : ", color)
chain = set()
frontier = [vertex]
has_liberty = False
while frontier:
current = frontier.pop()
# print ("current : ", current)
chain.add(current)
for n in self._neighbor(current):
if current_board[self.game._flatten(n)] == color and not n in chain:
frontier.append(n)
if current_board[self.game._flatten(n)] == utils.EMPTY:
has_liberty = True
return has_liberty, chain
def _is_suicide(self, current_board, color, vertex):
current_board[self.game._flatten(vertex)] = color # assume that we already take this move
suicide = False
has_liberty, group = self._find_group(current_board, vertex)
if not has_liberty:
suicide = True # no liberty, suicide
for n in self._neighbor(vertex):
if current_board[self.game._flatten(n)] == utils.another_color(color):
opponent_liberty, group = self._find_group(current_board, n)
if not opponent_liberty:
suicide = False # this move is able to take opponent's stone, not suicide
current_board[self.game._flatten(vertex)] = utils.EMPTY # undo this move
return suicide
def _process_board(self, current_board, color, vertex):
nei = self._neighbor(vertex)
for n in nei:
if current_board[self.game._flatten(n)] == utils.another_color(color):
has_liberty, group = self._find_group(current_board, n)
if not has_liberty:
for b in group:
current_board[self.game._flatten(b)] = utils.EMPTY
def _check_global_isomorphous(self, history_boards, current_board, color, vertex):
repeat = False
next_board = copy.copy(current_board)
next_board[self.game._flatten(vertex)] = color
self._process_board(next_board, color, vertex)
if next_board in history_boards:
repeat = True
return repeat
def _is_eye(self, current_board, color, vertex):
nei = self._neighbor(vertex)
cor = self._corner(vertex)
ncolor = {color == current_board[self.game._flatten(n)] for n in nei}
if False in ncolor:
# print "not all neighbors are in same color with us"
return False
_, group = self._find_group(current_board, nei[0])
if set(nei) < group:
# print "all neighbors are in same group and same color with us"
return True
else:
opponent_number = [current_board[self.game._flatten(c)] for c in cor].count(-color)
opponent_propotion = float(opponent_number) / float(len(cor))
if opponent_propotion < 0.5:
# print "few opponents, real eye"
return True
else:
# print "many opponents, fake eye"
return False
def _knowledge_prunning(self, current_board, color, vertex):
### check if it is an eye of yourself
### assumptions : notice that this judgement requires that the state is an endgame
if self._is_eye(current_board, color, vertex):
return False
return True
def _sa2cv(self, state, action):
# State is the play board, the shape is [1, self.game.size, self.game.size, 17], action is an index.
# We need to transfer the (state, action) pair into (color, vertex) pair to simulate the move
if state[0, 0, 0, -1] == utils.BLACK:
color = utils.BLACK
else:
color = utils.WHITE
if action == self.game.size ** 2:
vertex = (0, 0)
else:
vertex = self.game._deflatten(action)
return color, vertex
def _is_valid(self, history_boards, current_board, color, vertex):
### in board
if not self._in_board(vertex):
return False
### already have stone
if not current_board[self.game._flatten(vertex)] == utils.EMPTY:
return False
### check if it is suicide
if self._is_suicide(current_board, color, vertex):
return False
### forbid global isomorphous
if self._check_global_isomorphous(history_boards, current_board, color, vertex):
return False
return True
def simulate_is_valid(self, history_boards, current_board, state, action):
# initialize simulate_latest_boards and simulate_board from state
self.simulate_latest_boards.clear()
for i in range(8):
self.simulate_latest_boards.append((state[:, :, :, i] - state[:, :, :, i + 8]).reshape(-1).tolist())
self.simulate_board = copy.copy(self.simulate_latest_boards[-1])
color, vertex = self._sa2cv(state, action)
if not self._is_valid(history_boards, current_board, color, vertex):
return False
if not self._knowledge_prunning(current_board, color, vertex):
return False
return True
def _do_move(self, color, vertex):
if vertex == utils.PASS:
return True
id_ = self.game._flatten(vertex)
if self.simulate_board[id_] == utils.EMPTY:
self.simulate_board[id_] = color
return True
else:
return False
def simulate_step_forward(self, state, action):
# initialize the simulate_board from state
self.simulate_board = (state[:, :, :, 7] - state[:, :, :, 15]).reshape(-1).tolist()
color, vertex = self._sa2cv(state, action)
self._do_move(color, vertex)
new_state = np.concatenate(
[state[:, :, :, 1:8], (np.array(self.simulate_board) == utils.BLACK).reshape(1, self.game.size, self.game.size, 1),
state[:, :, :, 9:16], (np.array(self.simulate_board) == utils.WHITE).reshape(1, self.game.size, self.game.size, 1),
np.array(1 - state[:, :, :, -1]).reshape(1, self.game.size, self.game.size, 1)],
axis=3)
return new_state, 0