connect reversi with game

This commit is contained in:
Dong Yan 2017-12-23 13:05:25 +08:00
parent d19559857f
commit b2ef770415
7 changed files with 57 additions and 40 deletions

View File

@ -183,7 +183,7 @@ class GTPEngine():
return 'unknown player', False return 'unknown player', False
def cmd_get_score(self, args, **kwargs): def cmd_get_score(self, args, **kwargs):
return self._game.game_engine.executor_get_score(self._game.board, True), True return self._game.game_engine.executor_get_score(self._game.board), True
def cmd_show_board(self, args, **kwargs): def cmd_show_board(self, args, **kwargs):
return self._game.board, True return self._game.board, True
@ -194,4 +194,4 @@ class GTPEngine():
if __name__ == "main": if __name__ == "main":
game = Game() game = Game()
engine = GTPEngine(game_obj=Game) engine = GTPEngine(game_obj=game)

View File

@ -10,12 +10,14 @@ import copy
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import sys, os import sys, os
import go
import model import model
from collections import deque from collections import deque
sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir)) sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir))
from tianshou.core.mcts.mcts import MCTS from tianshou.core.mcts.mcts import MCTS
import go
import reversi
class Game: class Game:
''' '''
Load the real game and trained weights. Load the real game and trained weights.
@ -23,18 +25,26 @@ class Game:
TODO : Maybe merge with the engine class in future, TODO : Maybe merge with the engine class in future,
currently leave it untouched for interacting with Go UI. currently leave it untouched for interacting with Go UI.
''' '''
def __init__(self, size=9, komi=3.75, checkpoint_path=None): def __init__(self, name="go", checkpoint_path=None):
self.size = size self.name = name
self.komi = komi if "go" == name:
self.size = 9
self.komi = 3.75
self.board = [utils.EMPTY] * (self.size ** 2) self.board = [utils.EMPTY] * (self.size ** 2)
self.history = [] self.history = []
self.latest_boards = deque(maxlen=8) self.latest_boards = deque(maxlen=8)
for _ in range(8): for _ in range(8):
self.latest_boards.append(self.board) self.latest_boards.append(self.board)
self.evaluator = model.ResNet(self.size, self.size**2 + 1, history_length=8, checkpoint_path=checkpoint_path)
# self.evaluator = lambda state: self.sess.run([tf.nn.softmax(self.net.p), self.net.v], self.evaluator = model.ResNet(self.size, self.size**2 + 1, history_length=8)
# feed_dict={self.net.x: state, self.net.is_training: False})
self.game_engine = go.Go(size=self.size, komi=self.komi) self.game_engine = go.Go(size=self.size, komi=self.komi)
elif "reversi" == name:
self.size = 8
self.evaluator = model.ResNet(self.size, self.size**2 + 1, history_length=1)
self.game_engine = reversi.Reversi()
self.board = self.game_engine.get_board()
else:
print(name + " is an unknown game...")
def clear(self): def clear(self):
self.board = [utils.EMPTY] * (self.size ** 2) self.board = [utils.EMPTY] * (self.size ** 2)
@ -65,7 +75,11 @@ class Game:
# this function can be called directly to play the opponent's move # this function can be called directly to play the opponent's move
if vertex == utils.PASS: if vertex == utils.PASS:
return True return True
# TODO this implementation is not very elegant
if "go" == self.name:
res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex) res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex)
elif "revsersi" == self.name:
res = self.game_engine.executor_do_move(self.board, color, vertex)
return res return res
def think_play_move(self, color): def think_play_move(self, color):
@ -96,7 +110,7 @@ class Game:
sys.stdout.flush() sys.stdout.flush()
if __name__ == "__main__": if __name__ == "__main__":
g = Game(checkpoint_path='./checkpoints/') g = Game()
g.show_board() g.show_board()
g.think_play_move(1) g.think_play_move(1)
#file = open("debug.txt", "a") #file = open("debug.txt", "a")

View File

@ -157,7 +157,7 @@ class Go:
vertex = self._deflatten(action) vertex = self._deflatten(action)
return vertex return vertex
def _is_valid(self, history_boards, current_board, color, vertex): def _rule_check(self, history_boards, current_board, color, vertex):
### in board ### in board
if not self._in_board(vertex): if not self._in_board(vertex):
return False return False
@ -176,30 +176,30 @@ class Go:
return True return True
def simulate_is_valid(self, state, action): def _is_valid(self, state, action):
history_boards, color = state history_boards, color = state
vertex = self._action2vertex(action) vertex = self._action2vertex(action)
current_board = history_boards[-1] current_board = history_boards[-1]
if not self._is_valid(history_boards, current_board, color, vertex): if not self._rule_check(history_boards, current_board, color, vertex):
return False return False
if not self._knowledge_prunning(current_board, color, vertex): if not self._knowledge_prunning(current_board, color, vertex):
return False return False
return True return True
def simulate_is_valid_list(self, state, action_set): def simulate_get_mask(self, state, action_set):
# find all the invalid actions # find all the invalid actions
invalid_action_list = [] invalid_action_mask = []
for action_candidate in action_set[:-1]: for action_candidate in action_set[:-1]:
# go through all the actions excluding pass # go through all the actions excluding pass
if not self.simulate_is_valid(state, action_candidate): if not self._is_valid(state, action_candidate):
invalid_action_list.append(action_candidate) invalid_action_mask.append(action_candidate)
if len(invalid_action_list) < len(action_set) - 1: if len(invalid_action_mask) < len(action_set) - 1:
invalid_action_list.append(action_set[-1]) invalid_action_mask.append(action_set[-1])
# forbid pass, if we have other choices # forbid pass, if we have other choices
# TODO: In fact we should not do this. In some extreme cases, we should permit pass. # TODO: In fact we should not do this. In some extreme cases, we should permit pass.
return invalid_action_list return invalid_action_mask
def _do_move(self, board, color, vertex): def _do_move(self, board, color, vertex):
if vertex == utils.PASS: if vertex == utils.PASS:
@ -219,7 +219,7 @@ class Go:
return [history_boards, new_color], 0 return [history_boards, new_color], 0
def executor_do_move(self, history, latest_boards, current_board, color, vertex): def executor_do_move(self, history, latest_boards, current_board, color, vertex):
if not self._is_valid(history, current_board, color, vertex): if not self._rule_check(history, current_board, color, vertex):
return False return False
current_board[self._flatten(vertex)] = color current_board[self._flatten(vertex)] = color
self._process_board(current_board, color, vertex) self._process_board(current_board, color, vertex)
@ -280,7 +280,7 @@ class Go:
elif color_estimate < 0: elif color_estimate < 0:
return utils.WHITE return utils.WHITE
def executor_get_score(self, current_board, is_unknown_estimation=False): def executor_get_score(self, current_board):
''' '''
is_unknown_estimation: whether use nearby stone to predict the unknown is_unknown_estimation: whether use nearby stone to predict the unknown
return score from BLACK perspective. return score from BLACK perspective.
@ -294,10 +294,8 @@ class Go:
_board[self._flatten(vertex)] = utils.BLACK _board[self._flatten(vertex)] = utils.BLACK
elif boarder_color == {utils.WHITE}: elif boarder_color == {utils.WHITE}:
_board[self._flatten(vertex)] = utils.WHITE _board[self._flatten(vertex)] = utils.WHITE
elif is_unknown_estimation:
_board[self._flatten(vertex)] = self._predict_from_nearby(_board, vertex)
else: else:
_board[self._flatten(vertex)] =utils.UNKNOWN _board[self._flatten(vertex)] = self._predict_from_nearby(_board, vertex)
score = 0 score = 0
for i in _board: for i in _board:
if i == utils.BLACK: if i == utils.BLACK:

View File

@ -7,7 +7,6 @@ import time
import os import os
import cPickle import cPickle
class Data(object): class Data(object):
def __init__(self): def __init__(self):
self.boards = [] self.boards = []

View File

@ -25,7 +25,6 @@ def find_correct_moves(own, enemy):
mobility |= search_offset_right(own, enemy, mask, 7) # Left bottom mobility |= search_offset_right(own, enemy, mask, 7) # Left bottom
return mobility return mobility
def calc_flip(pos, own, enemy): def calc_flip(pos, own, enemy):
"""return flip stones of enemy by bitboard when I place stone at pos. """return flip stones of enemy by bitboard when I place stone at pos.
@ -133,7 +132,9 @@ class Reversi:
self.board = self.bitboard2board() self.board = self.bitboard2board()
return self.board return self.board
def simulate_is_valid(self, board, color): def simulate_get_mask(self, state, action_set):
history_boards, color = state
board = history_boards[-1]
self.board = board self.board = board
self.color = color self.color = color
self.board2bitboard() self.board2bitboard()
@ -142,13 +143,18 @@ class Reversi:
valid_moves = bit_to_array(mobility, 64) valid_moves = bit_to_array(mobility, 64)
valid_moves = np.argwhere(valid_moves) valid_moves = np.argwhere(valid_moves)
valid_moves = list(np.reshape(valid_moves, len(valid_moves))) valid_moves = list(np.reshape(valid_moves, len(valid_moves)))
return valid_moves # TODO it seems that the pass move is not considered
invalid_action_mask = []
for action in action_set:
if action not in valid_moves:
invalid_action_mask.append(action)
return invalid_action_mask
def simulate_step_forward(self, state, vertex): def simulate_step_forward(self, state, action):
self.board = state[0] self.board = state[0]
self.color = state[1] self.color = state[1]
self.board2bitboard() self.board2bitboard()
self.vertex2action(vertex) self.action = action
step_forward = self.step() step_forward = self.step()
if step_forward: if step_forward:
new_board = self.bitboard2board() new_board = self.bitboard2board()

View File

@ -79,7 +79,7 @@ while True:
prob.append(np.array(game.prob).reshape(-1, game.size ** 2 + 1)) prob.append(np.array(game.prob).reshape(-1, game.size ** 2 + 1))
print("Finished") print("Finished")
print("\n") print("\n")
score = game.game_engine.executor_get_score(game.board, True) score = game.game_engine.executor_get_score(game.board)
if score > 0: if score > 0:
winner = utils.BLACK winner = utils.BLACK
else: else:

View File

@ -73,7 +73,7 @@ class UCTNode(MCTSNode):
def valid_mask(self, simulator): def valid_mask(self, simulator):
# let all invalid actions be illeagel in mcts # let all invalid actions be illeagel in mcts
if self.mask is None: if self.mask is None:
self.mask = simulator.simulate_is_valid_list(self.state, range(self.action_num)) self.mask = simulator.simulate_get_mask(self.state, range(self.action_num))
self.ucb[self.mask] = -float("Inf") self.ucb[self.mask] = -float("Inf")