connect reversi with game
This commit is contained in:
parent
d19559857f
commit
b2ef770415
@ -183,7 +183,7 @@ class GTPEngine():
|
|||||||
return 'unknown player', False
|
return 'unknown player', False
|
||||||
|
|
||||||
def cmd_get_score(self, args, **kwargs):
|
def cmd_get_score(self, args, **kwargs):
|
||||||
return self._game.game_engine.executor_get_score(self._game.board, True), True
|
return self._game.game_engine.executor_get_score(self._game.board), True
|
||||||
|
|
||||||
def cmd_show_board(self, args, **kwargs):
|
def cmd_show_board(self, args, **kwargs):
|
||||||
return self._game.board, True
|
return self._game.board, True
|
||||||
@ -194,4 +194,4 @@ class GTPEngine():
|
|||||||
|
|
||||||
if __name__ == "main":
|
if __name__ == "main":
|
||||||
game = Game()
|
game = Game()
|
||||||
engine = GTPEngine(game_obj=Game)
|
engine = GTPEngine(game_obj=game)
|
||||||
|
@ -10,12 +10,14 @@ import copy
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sys, os
|
import sys, os
|
||||||
import go
|
|
||||||
import model
|
import model
|
||||||
from collections import deque
|
from collections import deque
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir))
|
sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir))
|
||||||
from tianshou.core.mcts.mcts import MCTS
|
from tianshou.core.mcts.mcts import MCTS
|
||||||
|
|
||||||
|
import go
|
||||||
|
import reversi
|
||||||
|
|
||||||
class Game:
|
class Game:
|
||||||
'''
|
'''
|
||||||
Load the real game and trained weights.
|
Load the real game and trained weights.
|
||||||
@ -23,18 +25,26 @@ class Game:
|
|||||||
TODO : Maybe merge with the engine class in future,
|
TODO : Maybe merge with the engine class in future,
|
||||||
currently leave it untouched for interacting with Go UI.
|
currently leave it untouched for interacting with Go UI.
|
||||||
'''
|
'''
|
||||||
def __init__(self, size=9, komi=3.75, checkpoint_path=None):
|
def __init__(self, name="go", checkpoint_path=None):
|
||||||
self.size = size
|
self.name = name
|
||||||
self.komi = komi
|
if "go" == name:
|
||||||
|
self.size = 9
|
||||||
|
self.komi = 3.75
|
||||||
self.board = [utils.EMPTY] * (self.size ** 2)
|
self.board = [utils.EMPTY] * (self.size ** 2)
|
||||||
self.history = []
|
self.history = []
|
||||||
self.latest_boards = deque(maxlen=8)
|
self.latest_boards = deque(maxlen=8)
|
||||||
for _ in range(8):
|
for _ in range(8):
|
||||||
self.latest_boards.append(self.board)
|
self.latest_boards.append(self.board)
|
||||||
self.evaluator = model.ResNet(self.size, self.size**2 + 1, history_length=8, checkpoint_path=checkpoint_path)
|
|
||||||
# self.evaluator = lambda state: self.sess.run([tf.nn.softmax(self.net.p), self.net.v],
|
self.evaluator = model.ResNet(self.size, self.size**2 + 1, history_length=8)
|
||||||
# feed_dict={self.net.x: state, self.net.is_training: False})
|
|
||||||
self.game_engine = go.Go(size=self.size, komi=self.komi)
|
self.game_engine = go.Go(size=self.size, komi=self.komi)
|
||||||
|
elif "reversi" == name:
|
||||||
|
self.size = 8
|
||||||
|
self.evaluator = model.ResNet(self.size, self.size**2 + 1, history_length=1)
|
||||||
|
self.game_engine = reversi.Reversi()
|
||||||
|
self.board = self.game_engine.get_board()
|
||||||
|
else:
|
||||||
|
print(name + " is an unknown game...")
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
self.board = [utils.EMPTY] * (self.size ** 2)
|
self.board = [utils.EMPTY] * (self.size ** 2)
|
||||||
@ -65,7 +75,11 @@ class Game:
|
|||||||
# this function can be called directly to play the opponent's move
|
# this function can be called directly to play the opponent's move
|
||||||
if vertex == utils.PASS:
|
if vertex == utils.PASS:
|
||||||
return True
|
return True
|
||||||
|
# TODO this implementation is not very elegant
|
||||||
|
if "go" == self.name:
|
||||||
res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex)
|
res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex)
|
||||||
|
elif "revsersi" == self.name:
|
||||||
|
res = self.game_engine.executor_do_move(self.board, color, vertex)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def think_play_move(self, color):
|
def think_play_move(self, color):
|
||||||
@ -96,7 +110,7 @@ class Game:
|
|||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
g = Game(checkpoint_path='./checkpoints/')
|
g = Game()
|
||||||
g.show_board()
|
g.show_board()
|
||||||
g.think_play_move(1)
|
g.think_play_move(1)
|
||||||
#file = open("debug.txt", "a")
|
#file = open("debug.txt", "a")
|
||||||
|
@ -157,7 +157,7 @@ class Go:
|
|||||||
vertex = self._deflatten(action)
|
vertex = self._deflatten(action)
|
||||||
return vertex
|
return vertex
|
||||||
|
|
||||||
def _is_valid(self, history_boards, current_board, color, vertex):
|
def _rule_check(self, history_boards, current_board, color, vertex):
|
||||||
### in board
|
### in board
|
||||||
if not self._in_board(vertex):
|
if not self._in_board(vertex):
|
||||||
return False
|
return False
|
||||||
@ -176,30 +176,30 @@ class Go:
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def simulate_is_valid(self, state, action):
|
def _is_valid(self, state, action):
|
||||||
history_boards, color = state
|
history_boards, color = state
|
||||||
vertex = self._action2vertex(action)
|
vertex = self._action2vertex(action)
|
||||||
current_board = history_boards[-1]
|
current_board = history_boards[-1]
|
||||||
|
|
||||||
if not self._is_valid(history_boards, current_board, color, vertex):
|
if not self._rule_check(history_boards, current_board, color, vertex):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if not self._knowledge_prunning(current_board, color, vertex):
|
if not self._knowledge_prunning(current_board, color, vertex):
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def simulate_is_valid_list(self, state, action_set):
|
def simulate_get_mask(self, state, action_set):
|
||||||
# find all the invalid actions
|
# find all the invalid actions
|
||||||
invalid_action_list = []
|
invalid_action_mask = []
|
||||||
for action_candidate in action_set[:-1]:
|
for action_candidate in action_set[:-1]:
|
||||||
# go through all the actions excluding pass
|
# go through all the actions excluding pass
|
||||||
if not self.simulate_is_valid(state, action_candidate):
|
if not self._is_valid(state, action_candidate):
|
||||||
invalid_action_list.append(action_candidate)
|
invalid_action_mask.append(action_candidate)
|
||||||
if len(invalid_action_list) < len(action_set) - 1:
|
if len(invalid_action_mask) < len(action_set) - 1:
|
||||||
invalid_action_list.append(action_set[-1])
|
invalid_action_mask.append(action_set[-1])
|
||||||
# forbid pass, if we have other choices
|
# forbid pass, if we have other choices
|
||||||
# TODO: In fact we should not do this. In some extreme cases, we should permit pass.
|
# TODO: In fact we should not do this. In some extreme cases, we should permit pass.
|
||||||
return invalid_action_list
|
return invalid_action_mask
|
||||||
|
|
||||||
def _do_move(self, board, color, vertex):
|
def _do_move(self, board, color, vertex):
|
||||||
if vertex == utils.PASS:
|
if vertex == utils.PASS:
|
||||||
@ -219,7 +219,7 @@ class Go:
|
|||||||
return [history_boards, new_color], 0
|
return [history_boards, new_color], 0
|
||||||
|
|
||||||
def executor_do_move(self, history, latest_boards, current_board, color, vertex):
|
def executor_do_move(self, history, latest_boards, current_board, color, vertex):
|
||||||
if not self._is_valid(history, current_board, color, vertex):
|
if not self._rule_check(history, current_board, color, vertex):
|
||||||
return False
|
return False
|
||||||
current_board[self._flatten(vertex)] = color
|
current_board[self._flatten(vertex)] = color
|
||||||
self._process_board(current_board, color, vertex)
|
self._process_board(current_board, color, vertex)
|
||||||
@ -280,7 +280,7 @@ class Go:
|
|||||||
elif color_estimate < 0:
|
elif color_estimate < 0:
|
||||||
return utils.WHITE
|
return utils.WHITE
|
||||||
|
|
||||||
def executor_get_score(self, current_board, is_unknown_estimation=False):
|
def executor_get_score(self, current_board):
|
||||||
'''
|
'''
|
||||||
is_unknown_estimation: whether use nearby stone to predict the unknown
|
is_unknown_estimation: whether use nearby stone to predict the unknown
|
||||||
return score from BLACK perspective.
|
return score from BLACK perspective.
|
||||||
@ -294,10 +294,8 @@ class Go:
|
|||||||
_board[self._flatten(vertex)] = utils.BLACK
|
_board[self._flatten(vertex)] = utils.BLACK
|
||||||
elif boarder_color == {utils.WHITE}:
|
elif boarder_color == {utils.WHITE}:
|
||||||
_board[self._flatten(vertex)] = utils.WHITE
|
_board[self._flatten(vertex)] = utils.WHITE
|
||||||
elif is_unknown_estimation:
|
|
||||||
_board[self._flatten(vertex)] = self._predict_from_nearby(_board, vertex)
|
|
||||||
else:
|
else:
|
||||||
_board[self._flatten(vertex)] =utils.UNKNOWN
|
_board[self._flatten(vertex)] = self._predict_from_nearby(_board, vertex)
|
||||||
score = 0
|
score = 0
|
||||||
for i in _board:
|
for i in _board:
|
||||||
if i == utils.BLACK:
|
if i == utils.BLACK:
|
||||||
|
@ -7,7 +7,6 @@ import time
|
|||||||
import os
|
import os
|
||||||
import cPickle
|
import cPickle
|
||||||
|
|
||||||
|
|
||||||
class Data(object):
|
class Data(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.boards = []
|
self.boards = []
|
||||||
|
@ -25,7 +25,6 @@ def find_correct_moves(own, enemy):
|
|||||||
mobility |= search_offset_right(own, enemy, mask, 7) # Left bottom
|
mobility |= search_offset_right(own, enemy, mask, 7) # Left bottom
|
||||||
return mobility
|
return mobility
|
||||||
|
|
||||||
|
|
||||||
def calc_flip(pos, own, enemy):
|
def calc_flip(pos, own, enemy):
|
||||||
"""return flip stones of enemy by bitboard when I place stone at pos.
|
"""return flip stones of enemy by bitboard when I place stone at pos.
|
||||||
|
|
||||||
@ -133,7 +132,9 @@ class Reversi:
|
|||||||
self.board = self.bitboard2board()
|
self.board = self.bitboard2board()
|
||||||
return self.board
|
return self.board
|
||||||
|
|
||||||
def simulate_is_valid(self, board, color):
|
def simulate_get_mask(self, state, action_set):
|
||||||
|
history_boards, color = state
|
||||||
|
board = history_boards[-1]
|
||||||
self.board = board
|
self.board = board
|
||||||
self.color = color
|
self.color = color
|
||||||
self.board2bitboard()
|
self.board2bitboard()
|
||||||
@ -142,13 +143,18 @@ class Reversi:
|
|||||||
valid_moves = bit_to_array(mobility, 64)
|
valid_moves = bit_to_array(mobility, 64)
|
||||||
valid_moves = np.argwhere(valid_moves)
|
valid_moves = np.argwhere(valid_moves)
|
||||||
valid_moves = list(np.reshape(valid_moves, len(valid_moves)))
|
valid_moves = list(np.reshape(valid_moves, len(valid_moves)))
|
||||||
return valid_moves
|
# TODO it seems that the pass move is not considered
|
||||||
|
invalid_action_mask = []
|
||||||
|
for action in action_set:
|
||||||
|
if action not in valid_moves:
|
||||||
|
invalid_action_mask.append(action)
|
||||||
|
return invalid_action_mask
|
||||||
|
|
||||||
def simulate_step_forward(self, state, vertex):
|
def simulate_step_forward(self, state, action):
|
||||||
self.board = state[0]
|
self.board = state[0]
|
||||||
self.color = state[1]
|
self.color = state[1]
|
||||||
self.board2bitboard()
|
self.board2bitboard()
|
||||||
self.vertex2action(vertex)
|
self.action = action
|
||||||
step_forward = self.step()
|
step_forward = self.step()
|
||||||
if step_forward:
|
if step_forward:
|
||||||
new_board = self.bitboard2board()
|
new_board = self.bitboard2board()
|
||||||
|
@ -79,7 +79,7 @@ while True:
|
|||||||
prob.append(np.array(game.prob).reshape(-1, game.size ** 2 + 1))
|
prob.append(np.array(game.prob).reshape(-1, game.size ** 2 + 1))
|
||||||
print("Finished")
|
print("Finished")
|
||||||
print("\n")
|
print("\n")
|
||||||
score = game.game_engine.executor_get_score(game.board, True)
|
score = game.game_engine.executor_get_score(game.board)
|
||||||
if score > 0:
|
if score > 0:
|
||||||
winner = utils.BLACK
|
winner = utils.BLACK
|
||||||
else:
|
else:
|
||||||
|
@ -73,7 +73,7 @@ class UCTNode(MCTSNode):
|
|||||||
def valid_mask(self, simulator):
|
def valid_mask(self, simulator):
|
||||||
# let all invalid actions be illeagel in mcts
|
# let all invalid actions be illeagel in mcts
|
||||||
if self.mask is None:
|
if self.mask is None:
|
||||||
self.mask = simulator.simulate_is_valid_list(self.state, range(self.action_num))
|
self.mask = simulator.simulate_get_mask(self.state, range(self.action_num))
|
||||||
self.ucb[self.mask] = -float("Inf")
|
self.ucb[self.mask] = -float("Inf")
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user