debug for go and reversi

This commit is contained in:
rtz19970824 2017-12-24 14:40:50 +08:00
parent 426251e158
commit 74504ceb1d
7 changed files with 111 additions and 109 deletions

View File

@ -6,6 +6,8 @@
# #
from game import Game from game import Game
import copy
import numpy as np
import utils import utils
@ -186,7 +188,10 @@ class GTPEngine():
return self._game.game_engine.executor_get_score(self._game.board), True return self._game.game_engine.executor_get_score(self._game.board), True
def cmd_show_board(self, args, **kwargs): def cmd_show_board(self, args, **kwargs):
return self._game.board, True board = copy.deepcopy(self._game.board)
if isinstance(board, np.ndarray):
board = board.flatten().tolist()
return board, True
def cmd_get_prob(self, args, **kwargs): def cmd_get_prob(self, args, **kwargs):
return self._game.prob, True return self._game.prob, True

View File

@ -26,33 +26,37 @@ class Game:
TODO : Maybe merge with the engine class in future, TODO : Maybe merge with the engine class in future,
currently leave it untouched for interacting with Go UI. currently leave it untouched for interacting with Go UI.
''' '''
def __init__(self, name="go", role="unknown", debug=False, checkpoint_path=None): def __init__(self, name="reversi", role="unknown", debug=False, checkpoint_path=None):
self.name = name self.name = name
self.role = role self.role = role
self.debug = debug self.debug = debug
if self.name == "go": if self.name == "go":
self.size = 9 self.size = 9
self.komi = 3.75 self.komi = 3.75
self.board = [utils.EMPTY] * (self.size ** 2)
self.history = [] self.history = []
self.history_length = 8 self.history_length = 8
self.latest_boards = deque(maxlen=8)
for _ in range(8):
self.latest_boards.append(self.board)
self.game_engine = go.Go(size=self.size, komi=self.komi, role=self.role) self.game_engine = go.Go(size=self.size, komi=self.komi, role=self.role)
self.board = [utils.EMPTY] * (self.size ** 2)
elif self.name == "reversi": elif self.name == "reversi":
self.size = 8 self.size = 8
self.history_length = 1 self.history_length = 1
self.game_engine = reversi.Reversi() self.history = []
self.game_engine = reversi.Reversi(size=self.size)
self.board = self.game_engine.get_board() self.board = self.game_engine.get_board()
else: else:
raise ValueError(name + " is an unknown game...") raise ValueError(name + " is an unknown game...")
self.evaluator = model.ResNet(self.size, self.size ** 2 + 1, history_length=self.history_length) self.evaluator = model.ResNet(self.size, self.size ** 2 + 1, history_length=self.history_length)
self.latest_boards = deque(maxlen=self.history_length)
for _ in range(self.history_length):
self.latest_boards.append(self.board)
def clear(self): def clear(self):
if self.name == "go":
self.board = [utils.EMPTY] * (self.size ** 2) self.board = [utils.EMPTY] * (self.size ** 2)
self.history = [] self.history = []
if self.name == "reversi":
self.board = self.game_engine.get_board()
for _ in range(self.history_length): for _ in range(self.history_length):
self.latest_boards.append(self.board) self.latest_boards.append(self.board)
@ -84,7 +88,7 @@ class Game:
if self.name == "go": if self.name == "go":
res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex) res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex)
elif self.name == "reversi": elif self.name == "reversi":
res = self.game_engine.executor_do_move(self.board, color, vertex) res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex)
return res return res
def think_play_move(self, color): def think_play_move(self, color):
@ -110,13 +114,14 @@ class Game:
if row[i] < 10: if row[i] < 10:
print(' ', end='') print(' ', end='')
for j in range(self.size): for j in range(self.size):
print(self.status2symbol(self.board[self._flatten((j + 1, i + 1))]), end=' ') print(self.status2symbol(self.board[self.game_engine._flatten((j + 1, i + 1))]), end=' ')
print('') print('')
sys.stdout.flush() sys.stdout.flush()
if __name__ == "__main__": if __name__ == "__main__":
g = Game() g = Game("go")
g.show_board() print(g.board)
g.clear()
g.think_play_move(1) g.think_play_move(1)
#file = open("debug.txt", "a") #file = open("debug.txt", "a")
#file.write("mcts check\n") #file.write("mcts check\n")

View File

@ -212,12 +212,12 @@ class Go:
def simulate_step_forward(self, state, action): def simulate_step_forward(self, state, action):
# initialize the simulate_board from state # initialize the simulate_board from state
history_boards, color = state history_boards, color = copy.deepcopy(state)
if history_boards[-1] == history_boards[-2] and action is utils.PASS: if history_boards[-1] == history_boards[-2] and action is utils.PASS:
return None, 2 * (float(self.executor_get_score(history_boards[-1]) > 0)-0.5) * color return None, 2 * (float(self.executor_get_score(history_boards[-1]) > 0)-0.5) * color
else: else:
vertex = self._action2vertex(action) vertex = self._action2vertex(action)
new_board = self._do_move(copy.copy(history_boards[-1]), color, vertex) new_board = self._do_move(copy.deepcopy(history_boards[-1]), color, vertex)
history_boards.append(new_board) history_boards.append(new_board)
new_color = -color new_color = -color
return [history_boards, new_color], 0 return [history_boards, new_color], 0
@ -227,8 +227,8 @@ class Go:
return False return False
current_board[self._flatten(vertex)] = color current_board[self._flatten(vertex)] = color
self._process_board(current_board, color, vertex) self._process_board(current_board, color, vertex)
history.append(copy.copy(current_board)) history.append(copy.deepcopy(current_board))
latest_boards.append(copy.copy(current_board)) latest_boards.append(copy.deepcopy(current_board))
return True return True
def _find_empty(self, current_board): def _find_empty(self, current_board):

View File

@ -173,10 +173,10 @@ class ResNet(object):
""" """
state = np.zeros([1, self.board_size, self.board_size, 2 * self.history_length + 1]) state = np.zeros([1, self.board_size, self.board_size, 2 * self.history_length + 1])
for i in range(self.history_length): for i in range(self.history_length):
state[0, :, :, i] = np.array(np.array(history[i]) == np.ones(self.board_size ** 2)).reshape(self.board_size, state[0, :, :, i] = np.array(np.array(history[i]).flatten() == np.ones(self.board_size ** 2)).reshape(self.board_size,
self.board_size) self.board_size)
state[0, :, :, i + self.history_length] = np.array( state[0, :, :, i + self.history_length] = np.array(
np.array(history[i]) == -np.ones(self.board_size ** 2)).reshape(self.board_size, self.board_size) np.array(history[i]).flatten() == -np.ones(self.board_size ** 2)).reshape(self.board_size, self.board_size)
# TODO: need a config to specify the BLACK and WHITE # TODO: need a config to specify the BLACK and WHITE
if color == +1: if color == +1:
state[0, :, :, 2 * self.history_length] = np.ones([self.board_size, self.board_size]) state[0, :, :, 2 * self.history_length] = np.ones([self.board_size, self.board_size])
@ -301,7 +301,7 @@ class ResNet(object):
:return: :return:
""" """
new_board = copy.copy(board) new_board = copy.deepcopy(board)
if new_board.ndim == 3: if new_board.ndim == 3:
new_board = np.expand_dims(new_board, axis=0) new_board = np.expand_dims(new_board, axis=0)
@ -331,7 +331,7 @@ class ResNet(object):
:param orientation: an integer, which orientation to reflect :param orientation: an integer, which orientation to reflect
:return: :return:
""" """
new_board = copy.copy(board) new_board = copy.deepcopy(board)
for _ in range(times): for _ in range(times):
if orientation == 0: if orientation == 0:
new_board = new_board[:, ::-1] new_board = new_board[:, ::-1]

View File

@ -89,7 +89,7 @@ if __name__ == '__main__':
pattern = "[A-Z]{1}[0-9]{1}" pattern = "[A-Z]{1}[0-9]{1}"
space = re.compile("\s+") space = re.compile("\s+")
size = 9 size = {"go":9, "reversi":8}
show = ['.', 'X', 'O'] show = ['.', 'X', 'O']
evaluate_rounds = 1 evaluate_rounds = 1
@ -102,13 +102,13 @@ if __name__ == '__main__':
pass_flag = [False, False] pass_flag = [False, False]
print("Start game {}".format(game_num)) print("Start game {}".format(game_num))
# end the game if both palyer chose to pass, or play too much turns # end the game if both palyer chose to pass, or play too much turns
while not (pass_flag[0] and pass_flag[1]) and num < size ** 2 * 2: while not (pass_flag[0] and pass_flag[1]) and num < size["reversi"] ** 2 * 2:
turn = num % 2 turn = num % 2
board = player[turn].run_cmd(str(num) + ' show_board') board = player[turn].run_cmd(str(num) + ' show_board')
board = eval(board[board.index('['):board.index(']') + 1]) board = eval(board[board.index('['):board.index(']') + 1])
for i in range(size): for i in range(size["reversi"]):
for j in range(size): for j in range(size["reversi"]):
print show[board[i * size + j]] + " ", print show[board[i * size["reversi"] + j]] + " ",
print "\n", print "\n",
data.boards.append(board) data.boards.append(board)
start_time = time.time() start_time = time.time()

View File

@ -1,4 +1,5 @@
import numpy as np import numpy as np
import copy
''' '''
Settings of the Reversi game. Settings of the Reversi game.
@ -8,13 +9,8 @@ Settings of the Reversi game.
class Reversi: class Reversi:
def __init__(self, black=None, white=None): def __init__(self, **kwargs):
self.board = None # 8 * 8 board with 1 for black, -1 for white and 0 for blank self.size = kwargs['size']
self.color = None # 1 for black and -1 for white
self.action = None # number in 0~63
self.winner = None
self.black_win = None
self.size = 8
def _deflatten(self, idx): def _deflatten(self, idx):
x = idx // self.size + 1 x = idx // self.size + 1
@ -24,39 +20,39 @@ class Reversi:
def _flatten(self, vertex): def _flatten(self, vertex):
x, y = vertex x, y = vertex
if (x == 0) and (y == 0): if (x == 0) and (y == 0):
return 64 return self.size ** 2
return (x - 1) * self.size + (y - 1) return (x - 1) * self.size + (y - 1)
def get_board(self, board=None): def get_board(self):
self.board = board or np.zeros([8,8]) board = np.zeros([self.size, self.size], dtype=np.int32)
self.board[3, 3] = -1 board[self.size / 2 - 1, self.size / 2 - 1] = -1
self.board[4, 4] = -1 board[self.size / 2, self.size / 2] = -1
self.board[3, 4] = 1 board[self.size / 2 - 1, self.size / 2] = 1
self.board[4, 3] = 1 board[self.size / 2, self.size / 2 - 1] = 1
return self.board return board
def _find_correct_moves(self, is_next=False): def _find_correct_moves(self, board, color, is_next=False):
moves = [] moves = []
if is_next: if is_next:
color = 0 - self.color new_color = 0 - color
else: else:
color = self.color new_color = color
for i in range(64): for i in range(self.size ** 2):
x, y = self._deflatten(i) x, y = self._deflatten(i)
valid = self._is_valid(x - 1, y - 1, color) valid = self._is_valid(board, x - 1, y - 1, new_color)
if valid: if valid:
moves.append(i) moves.append(i)
return moves return moves
def _one_direction_valid(self, x, y, color): def _one_direction_valid(self, board, x, y, color):
if (x >= 0) and (x < self.size): if (x >= 0) and (x < self.size):
if (y >= 0) and (y < self.size): if (y >= 0) and (y < self.size):
if self.board[x, y] == color: if board[x, y] == color:
return True return True
return False return False
def _is_valid(self, x, y, color): def _is_valid(self, board, x, y, color):
if self.board[x, y]: if board[x, y]:
return False return False
for x_direction in [-1, 0, 1]: for x_direction in [-1, 0, 1]:
for y_direction in [-1, 0, 1]: for y_direction in [-1, 0, 1]:
@ -66,20 +62,18 @@ class Reversi:
while True: while True:
new_x += x_direction new_x += x_direction
new_y += y_direction new_y += y_direction
if self._one_direction_valid(new_x, new_y, 0 - color): if self._one_direction_valid(board, new_x, new_y, 0 - color):
flag = 1 flag = 1
else: else:
break break
if self._one_direction_valid(new_x, new_y, color) and flag: if self._one_direction_valid(board, new_x, new_y, color) and flag:
return True return True
return False return False
def simulate_get_mask(self, state, action_set): def simulate_get_mask(self, state, action_set):
history_boards, color = state history_boards, color = copy.deepcopy(state)
self.board = np.reshape(history_boards[-1], (self.size, self.size)) board = copy.deepcopy(history_boards[-1])
self.color = color valid_moves = self._find_correct_moves(board, color)
valid_moves = self._find_correct_moves()
print(valid_moves)
if not len(valid_moves): if not len(valid_moves):
invalid_action_mask = action_set[0:-1] invalid_action_mask = action_set[0:-1]
else: else:
@ -90,34 +84,34 @@ class Reversi:
return invalid_action_mask return invalid_action_mask
def simulate_step_forward(self, state, action): def simulate_step_forward(self, state, action):
self.board = state[0].copy() history_boards, color = copy.deepcopy(state)
self.board = np.reshape(self.board, (self.size, self.size)) board = copy.deepcopy(history_boards[-1])
self.color = state[1] if action == self.size ** 2:
self.action = action valid_moves = self._find_correct_moves(board, color, is_next=True)
if self.action == 64:
valid_moves = self._find_correct_moves(is_next=True)
if not len(valid_moves): if not len(valid_moves):
self._game_over() winner = self._get_winner(board)
return None, self.winner * self.color return None, winner * color
else: else:
return [self.board, 0 - self.color], 0 return [history_boards, 0 - color], 0
self._step() new_board = self._step(board, color, action)
return [self.board, 0 - self.color], 0 history_boards.append(new_board)
return [history_boards, 0 - color], 0
def _game_over(self): def _get_winner(self, board):
black_num, white_num = self._number_of_black_and_white() black_num, white_num = self._number_of_black_and_white(board)
self.black_win = black_num - white_num black_win = black_num - white_num
if self.black_win > 0: if black_win > 0:
self.winner = 1 winner = 1
elif self.black_win < 0: elif black_win < 0:
self.winner = -1 winner = -1
else: else:
self.winner = 0 winner = 0
return winner
def _number_of_black_and_white(self): def _number_of_black_and_white(self, board):
black_num = 0 black_num = 0
white_num = 0 white_num = 0
board_list = np.reshape(self.board, self.size ** 2) board_list = np.reshape(board, self.size ** 2)
for i in range(len(board_list)): for i in range(len(board_list)):
if board_list[i] == 1: if board_list[i] == 1:
black_num += 1 black_num += 1
@ -125,19 +119,18 @@ class Reversi:
white_num += 1 white_num += 1
return black_num, white_num return black_num, white_num
def _step(self): def _step(self, board, color, action):
if self.action < 0 or self.action > 63: if action < 0 or action > self.size ** 2 - 1:
raise ValueError("Action not in the range of [0,63]!") raise ValueError("Action not in the range of [0,63]!")
if self.action is None: if action is None:
raise ValueError("Action is None!") raise ValueError("Action is None!")
x, y = self._deflatten(self.action) x, y = self._deflatten(action)
valid = self._flip(x -1, y - 1) new_board = self._flip(board, x - 1, y - 1, color)
if not valid: return new_board
raise ValueError("Illegal action!")
def _flip(self, x, y): def _flip(self, board, x, y, color):
valid = 0 valid = 0
self.board[x, y] = self.color board[x, y] = color
for x_direction in [-1, 0, 1]: for x_direction in [-1, 0, 1]:
for y_direction in [-1, 0, 1]: for y_direction in [-1, 0, 1]:
new_x = x new_x = x
@ -146,47 +139,46 @@ class Reversi:
while True: while True:
new_x += x_direction new_x += x_direction
new_y += y_direction new_y += y_direction
if self._one_direction_valid(new_x, new_y, 0 - self.color): if self._one_direction_valid(board, new_x, new_y, 0 - color):
flag = 1 flag = 1
else: else:
break break
if self._one_direction_valid(new_x, new_y, self.color) and flag: if self._one_direction_valid(board, new_x, new_y, color) and flag:
valid = 1 valid = 1
flip_x = x flip_x = x
flip_y = y flip_y = y
while True: while True:
flip_x += x_direction flip_x += x_direction
flip_y += y_direction flip_y += y_direction
if self._one_direction_valid(flip_x, flip_y, 0 - self.color): if self._one_direction_valid(board, flip_x, flip_y, 0 - color):
self.board[flip_x, flip_y] = self.color board[flip_x, flip_y] = color
else: else:
break break
if valid: if valid:
return True return board
else: else:
return False raise ValueError("Invalid action")
def executor_do_move(self, history, latest_boards, board, color, vertex): def executor_do_move(self, history, latest_boards, board, color, vertex):
self.board = np.reshape(board, (self.size, self.size)) board = np.reshape(board, (self.size, self.size))
self.color = color color = color
self.action = self._flatten(vertex) action = self._flatten(vertex)
if self.action == 64: if action == self.size ** 2:
valid_moves = self._find_correct_moves(is_next=True) valid_moves = self._find_correct_moves(board, color, is_next=True)
if not len(valid_moves): if not len(valid_moves):
return False return False
else: else:
return True return True
else: else:
self._step() new_board = self._step(board, color, action)
history.append(new_board)
latest_boards.append(new_board)
return True return True
def executor_get_score(self, board): def executor_get_score(self, board):
self.board = board board = board
self._game_over() winner = self._get_winner(board)
if self.black_win is not None: return winner
return self.black_win
else:
raise ValueError("Game not finished!")
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -110,15 +110,15 @@ class ActionNode(object):
self.reward = 0 self.reward = 0
def type_conversion_to_tuple(self): def type_conversion_to_tuple(self):
if type(self.next_state) is np.ndarray: if isinstance(self.next_state, np.ndarray):
self.next_state = self.next_state.tolist() self.next_state = self.next_state.tolist()
if type(self.next_state) is list: if isinstance(self.next_state, list):
self.next_state = list2tuple(self.next_state) self.next_state = list2tuple(self.next_state)
def type_conversion_to_origin(self): def type_conversion_to_origin(self):
if self.state_type is np.ndarray: if isinstance(self.state_type, np.ndarray):
self.next_state = np.array(self.next_state) self.next_state = np.array(self.next_state)
if self.state_type is list: if isinstance(self.state_type, np.ndarray):
self.next_state = tuple2list(self.next_state) self.next_state = tuple2list(self.next_state)
def selection(self, simulator): def selection(self, simulator):