self play

This commit is contained in:
rtz19970824 2017-12-08 17:05:33 +08:00
parent b687241a7d
commit 906ced84a3
8 changed files with 182 additions and 104 deletions

View File

@ -190,7 +190,7 @@ class Game:
self.executor = Executor(game=self) self.executor = Executor(game=self)
self.history = [] self.history = []
self.past = deque(maxlen=8) self.past = deque(maxlen=8)
for i in range(8): for _ in range(8):
self.past.append(self.board) self.past.append(self.board)
def _flatten(self, vertex): def _flatten(self, vertex):
@ -205,6 +205,9 @@ class Game:
def clear(self): def clear(self):
self.board = [utils.EMPTY] * (self.size * self.size) self.board = [utils.EMPTY] * (self.size * self.size)
self.history = []
for _ in range(8):
self.past.append(self.board)
def set_size(self, n): def set_size(self, n):
self.size = n self.size = n
@ -225,7 +228,7 @@ class Game:
def gen_move(self, color): def gen_move(self, color):
# move = self.strategy.gen_move(color) # move = self.strategy.gen_move(color)
# return move # return move
move = self.strategy.gen_move(self.past, color) move, self.prob = self.strategy.gen_move(self.past, color)
self.do_move(color, move) self.do_move(color, move)
return move return move

View File

@ -59,11 +59,12 @@ class Network(object):
self.build_network() self.build_network()
def build_network(self): def build_network(self):
h = layers.conv2d(self.x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, h = layers.conv2d(self.x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
normalizer_fn=layers.batch_norm,
normalizer_params={'is_training': self.is_training, normalizer_params={'is_training': self.is_training,
'updates_collections': tf.GraphKeys.UPDATE_OPS}, 'updates_collections': tf.GraphKeys.UPDATE_OPS},
weights_regularizer=layers.l2_regularizer(1e-4)) weights_regularizer=layers.l2_regularizer(1e-4))
for i in range(19): for i in range(4):
h = residual_block(h, self.is_training) h = residual_block(h, self.is_training)
self.v = value_heads(h, self.is_training) self.v = value_heads(h, self.is_training)
self.p = policy_heads(h, self.is_training) self.p = policy_heads(h, self.is_training)

View File

@ -1,40 +1,98 @@
from game import Game from game import Game
from engine import GTPEngine from engine import GTPEngine
import re import re
import numpy as np
from collections import deque
import utils
import argparse
g = Game() parser = argparse.ArgumentParser()
parser.add_argument('--result_path', type=str, default='./part1')
args = parser.parse_args()
game = Game()
engine = GTPEngine(game_obj=game)
history = deque(maxlen=8)
for i in range(8):
history.append(game.board)
state = []
prob = []
winner = []
pattern = "[A-Z]{1}[0-9]{1}" pattern = "[A-Z]{1}[0-9]{1}"
game.show_board()
def history2state(history, color):
state = np.zeros([1, game.size, game.size, 17])
for i in range(8):
state[0, :, :, i] = np.array(np.array(history[i]) == np.ones(game.size ** 2)).reshape(game.size, game.size)
state[0, :, :, i + 8] = np.array(np.array(history[i]) == -np.ones(game.size ** 2)).reshape(game.size, game.size)
if color == utils.BLACK:
state[0, :, :, 16] = np.ones([game.size, game.size])
if color == utils.WHITE:
state[0, :, :, 16] = np.zeros([game.size, game.size])
return state
g.show_board()
e = GTPEngine(game_obj=g)
num = 0 num = 0
game_num = 0
black_pass = False black_pass = False
white_pass = False white_pass = False
while not (black_pass and white_pass): while True:
while not (black_pass and white_pass) and num < game.size ** 2 * 2:
if num % 2 == 0: if num % 2 == 0:
res = e.run_cmd(str(num) + " genmove BLACK") color = utils.BLACK
new_state = history2state(history, color)
state.append(new_state)
result = engine.run_cmd(str(num) + " genmove BLACK")
num += 1 num += 1
# print(res) match = re.search(pattern, result)
match = re.search(pattern, res)
if match is not None: if match is not None:
print(match.group()) print(match.group())
else: else:
print("pass") print("pass")
if re.search("pass", res) is not None: if re.search("pass", result) is not None:
black_pass = True black_pass = True
else: else:
black_pass = False black_pass = False
else: else:
res = e.run_cmd(str(num) + " genmove WHITE") color = utils.WHITE
new_state = history2state(history, color)
state.append(new_state)
result = engine.run_cmd(str(num) + " genmove WHITE")
num += 1 num += 1
match = re.search(pattern, res) match = re.search(pattern, result)
if match is not None: if match is not None:
print(match.group()) print(match.group())
else: else:
print("pass") print("pass")
if re.search("pass", res) is not None: if re.search("pass", result) is not None:
white_pass = True white_pass = True
else: else:
white_pass = False white_pass = False
g.show_board() game.show_board()
prob.append(np.array(game.prob).reshape(-1, game.size ** 2 + 1))
print("Finished")
score = game.executor.get_score()
if score > 0:
winner = utils.BLACK
else:
winner = utils.WHITE
state = np.concatenate(state, axis=0)
prob = np.concatenate(prob, axis=0)
winner = np.ones([num, 1]) * winner
assert state.shape[0] == prob.shape[0]
assert state.shape[0] == winner.shape[0]
np.savez(args.result_path + "/game" + game_num, state=state, prob=prob, winner=winner)
state = []
prob = []
winner = []
num = 0
black_pass = False
white_pass = False
engine.run_cmd(str(num) + " clear_board")
history.clear()
for _ in range(8):
history.append(game.board)
game.show_board()
game_num += 1

View File

@ -198,28 +198,27 @@ class GoEnv:
id_ = self._flatten(vertex) id_ = self._flatten(vertex)
if self.board[id_] == utils.EMPTY: if self.board[id_] == utils.EMPTY:
self.board[id_] = color self.board[id_] = color
self.history.append(copy.copy(self.board))
return True return True
else: else:
return False return False
def step_forward(self, state, action): def step_forward(self, state, action):
if state[0, 0, 0, -1] == 1: if state[0, 0, 0, -1] == 1:
color = 1 color = utils.BLACK
else: else:
color = -1 color = utils.WHITE
if action == 81: if action == self.size ** 2:
vertex = (0, 0) vertex = utils.PASS
else: else:
vertex = (action % 9 + 1, action / 9 + 1) vertex = (action % self.size + 1, action / self.size + 1)
# print(vertex) # print(vertex)
# print(self.board) # print(self.board)
self.board = (state[:, :, :, 7] - state[:, :, :, 15]).reshape(-1).tolist() self.board = (state[:, :, :, 7] - state[:, :, :, 15]).reshape(-1).tolist()
self.do_move(color, vertex) self.do_move(color, vertex)
new_state = np.concatenate( new_state = np.concatenate(
[state[:, :, :, 1:8], (np.array(self.board) == 1).reshape(1, 9, 9, 1), [state[:, :, :, 1:8], (np.array(self.board) == utils.BLACK).reshape(1, self.size, self.size, 1),
state[:, :, :, 9:16], (np.array(self.board) == -1).reshape(1, 9, 9, 1), state[:, :, :, 9:16], (np.array(self.board) == utils.WHITE).reshape(1, self.size, self.size, 1),
np.array(1 - state[:, :, :, -1]).reshape(1, 9, 9, 1)], np.array(1 - state[:, :, :, -1]).reshape(1, self.size, self.size, 1)],
axis=3) axis=3)
return new_state, 0 return new_state, 0
@ -233,26 +232,26 @@ class strategy(object):
feed_dict={self.net.x: state, self.net.is_training: False}) feed_dict={self.net.x: state, self.net.is_training: False})
def data_process(self, history, color): def data_process(self, history, color):
state = np.zeros([1, 9, 9, 17]) state = np.zeros([1, self.simulator.size, self.simulator.size, 17])
for i in range(8): for i in range(8):
state[0, :, :, i] = np.array(np.array(history[i]) == np.ones(81)).reshape(9, 9) state[0, :, :, i] = np.array(np.array(history[i]) == np.ones(self.simulator.size ** 2)).reshape(self.simulator.size, self.simulator.size)
state[0, :, :, i + 8] = np.array(np.array(history[i]) == -np.ones(81)).reshape(9, 9) state[0, :, :, i + 8] = np.array(np.array(history[i]) == -np.ones(self.simulator.size ** 2)).reshape(self.simulator.size, self.simulator.size)
if color == 1: if color == utils.BLACK:
state[0, :, :, 16] = np.ones([9, 9]) state[0, :, :, 16] = np.ones([self.simulator.size, self.simulator.size])
if color == -1: if color == utils.WHITE:
state[0, :, :, 16] = np.zeros([9, 9]) state[0, :, :, 16] = np.zeros([self.simulator.size, self.simulator.size])
return state return state
def gen_move(self, history, color): def gen_move(self, history, color):
self.simulator.history = copy.copy(history) self.simulator.history = copy.copy(history)
self.simulator.board = copy.copy(history[-1]) self.simulator.board = copy.copy(history[-1])
state = self.data_process(self.simulator.history, color) state = self.data_process(self.simulator.history, color)
mcts = MCTS(self.simulator, self.evaluator, state, 82, inverse=True, max_step=10) mcts = MCTS(self.simulator, self.evaluator, state, self.simulator.size ** 2 + 1, inverse=True, max_step=100)
temp = 1 temp = 1
p = mcts.root.N ** temp / np.sum(mcts.root.N ** temp) prob = mcts.root.N ** temp / np.sum(mcts.root.N ** temp)
choice = np.random.choice(82, 1, p=p).tolist()[0] choice = np.random.choice(self.simulator.size ** 2 + 1, 1, p=prob).tolist()[0]
if choice == 81: if choice == self.simulator.size ** 2:
move = (0, 0) move = utils.PASS
else: else:
move = (choice % 9 + 1, choice / 9 + 1) move = (choice % self.simulator.size + 1, choice / self.simulator.size + 1)
return move return move, prob

14
AlphaGo/test.py Normal file
View File

@ -0,0 +1,14 @@
import sys
from game import Game
from engine import GTPEngine
# import utils
game = Game()
engine = GTPEngine(game_obj=game, name='tianshou')
cmd = raw_input
while not engine.disconnect:
command = cmd()
result = engine.run_cmd(command)
sys.stdout.write(result)
sys.stdout.flush()

1
bin/activate Symbolic link
View File

@ -0,0 +1 @@
/home/tongzheng/anaconda2/bin/activate

1
bin/conda Symbolic link
View File

@ -0,0 +1 @@
/home/tongzheng/anaconda2/bin/conda

1
bin/deactivate Symbolic link
View File

@ -0,0 +1 @@
/home/tongzheng/anaconda2/bin/deactivate