From 13738f13c4036bb18655d7bef68bc5d5e355fa24 Mon Sep 17 00:00:00 2001 From: rtz19970824 Date: Tue, 28 Nov 2017 17:00:10 +0800 Subject: [PATCH] merge gtp --- AlphaGo/Network.py | 280 +++++++++++++++++++++++--------------------- AlphaGo/game.py | 197 ++++++++++++++++++++++++++----- AlphaGo/strategy.py | 12 +- AlphaGo/test.py | 217 ++++++++++++++++++---------------- AlphaGo/utils.py | 133 ++------------------- GTP/game.py | 27 +++-- GTP/utils.py | 3 - 7 files changed, 460 insertions(+), 409 deletions(-) diff --git a/AlphaGo/Network.py b/AlphaGo/Network.py index 696c20a..caf7710 100644 --- a/AlphaGo/Network.py +++ b/AlphaGo/Network.py @@ -3,6 +3,7 @@ import time import sys import numpy as np +import time import tensorflow as tf import tensorflow.contrib.layers as layers @@ -49,151 +50,162 @@ def value_heads(input, is_training): return h -x = tf.placeholder(tf.float32, shape=[None, 19, 19, 17]) -is_training = tf.placeholder(tf.bool, shape=[]) -z = tf.placeholder(tf.float32, shape=[None, 1]) -pi = tf.placeholder(tf.float32, shape=[None, 362]) +class Network(object): + def __init__(self): + self.x = tf.placeholder(tf.float32, shape=[None, 19, 19, 17]) + self.is_training = tf.placeholder(tf.bool, shape=[]) + self.z = tf.placeholder(tf.float32, shape=[None, 1]) + self.pi = tf.placeholder(tf.float32, shape=[None, 362]) + self.build_network() -h = layers.conv2d(x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, - normalizer_params={'is_training': is_training, 'updates_collections': tf.GraphKeys.UPDATE_OPS}, - weights_regularizer=layers.l2_regularizer(1e-4)) -for i in range(19): - h = residual_block(h, is_training) -v = value_heads(h, is_training) -p = policy_heads(h, is_training) -# loss = tf.reduce_mean(tf.square(z-v)) - tf.multiply(pi, tf.log(tf.clip_by_value(tf.nn.softmax(p), 1e-8, tf.reduce_max(tf.nn.softmax(p))))) -value_loss = tf.reduce_mean(tf.square(z - v)) -policy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=pi, logits=p)) + def build_network(self): + h = layers.conv2d(self.x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, + normalizer_params={'is_training': self.is_training, + 'updates_collections': tf.GraphKeys.UPDATE_OPS}, + weights_regularizer=layers.l2_regularizer(1e-4)) + for i in range(19): + h = residual_block(h, self.is_training) + self.v = value_heads(h, self.is_training) + self.p = policy_heads(h, self.is_training) + # loss = tf.reduce_mean(tf.square(z-v)) - tf.multiply(pi, tf.log(tf.clip_by_value(tf.nn.softmax(p), 1e-8, tf.reduce_max(tf.nn.softmax(p))))) + self.value_loss = tf.reduce_mean(tf.square(self.z - self.v)) + self.policy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.pi, logits=self.p)) -reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) -total_loss = value_loss + policy_loss + reg -# train_op = tf.train.MomentumOptimizer(1e-4, momentum=0.9, use_nesterov=True).minimize(total_loss) -update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) -with tf.control_dependencies(update_ops): - train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss) -var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) -saver = tf.train.Saver(max_to_keep=10, var_list=var_list) + self.reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) + self.total_loss = self.value_loss + self.policy_loss + self.reg + # train_op = tf.train.MomentumOptimizer(1e-4, momentum=0.9, use_nesterov=True).minimize(total_loss) + self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + with tf.control_dependencies(self.update_ops): + self.train_op = tf.train.RMSPropOptimizer(1e-4).minimize(self.total_loss) + self.var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) + self.saver = tf.train.Saver(max_to_keep=10, var_list=self.var_list) + + def train(self): + data_path = "/home/tongzheng/data/" + data_name = os.listdir("/home/tongzheng/data/") + epochs = 100 + batch_size = 128 + + result_path = "./checkpoints/" + with multi_gpu.create_session() as sess: + sess.run(tf.global_variables_initializer()) + ckpt_file = tf.train.latest_checkpoint(result_path) + if ckpt_file is not None: + print('Restoring model from {}...'.format(ckpt_file)) + self.saver.restore(sess, ckpt_file) + for epoch in range(epochs): + for name in data_name: + data = np.load(data_path + name) + boards = data["boards"] + wins = data["wins"] + ps = data["ps"] + print (boards.shape) + print (wins.shape) + print (ps.shape) + batch_num = boards.shape[0] // batch_size + index = np.arange(boards.shape[0]) + np.random.shuffle(index) + value_losses = [] + policy_losses = [] + regs = [] + time_train = -time.time() + for iter in range(batch_num): + lv, lp, r, value, prob, _ = sess.run( + [self.value_loss, self.policy_loss, self.reg, self.v, tf.nn.softmax(p), self.train_op], + feed_dict={self.x: boards[ + index[iter * batch_size:(iter + 1) * batch_size]], + self.z: wins[index[ + iter * batch_size:(iter + 1) * batch_size]], + self.pi: ps[index[ + iter * batch_size:(iter + 1) * batch_size]], + self.is_training: True}) + value_losses.append(lv) + policy_losses.append(lp) + regs.append(r) + if iter % 1 == 0: + print( + "Epoch: {}, Part {}, Iteration: {}, Time: {}, Value Loss: {}, Policy Loss: {}, Reg: {}".format( + epoch, name, iter, time.time() + time_train, np.mean(np.array(value_losses)), + np.mean(np.array(policy_losses)), np.mean(np.array(regs)))) + time_train = -time.time() + value_losses = [] + policy_losses = [] + regs = [] + if iter % 20 == 0: + save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter) + self.saver.save(sess, result_path + save_path) + del data, boards, wins, ps -def train(): - data_path = "/home/tongzheng/data/" - data_name = os.listdir("/home/tongzheng/data/") - epochs = 100 - batch_size = 128 + # def forward(call_number): + # # checkpoint_path = "/home/yama/rl/tianshou/AlphaGo/checkpoints" + # checkpoint_path = "/home/jialian/stuGo/tianshou/stuGo/checkpoints/" + # board_file = np.genfromtxt("/home/jialian/stuGo/tianshou/leela-zero/src/mcts_nn_files/board_" + call_number, + # dtype='str'); + # human_board = np.zeros((17, 19, 19)) + # + # # TODO : is it ok to ignore the last channel? + # for i in range(17): + # human_board[i] = np.array(list(board_file[i])).reshape(19, 19) + # # print("============================") + # # print("human board sum : " + str(np.sum(human_board[-1]))) + # # print("============================") + # # print(human_board) + # # print("============================") + # # rint(human_board) + # feed_board = human_board.transpose(1, 2, 0).reshape(1, 19, 19, 17) + # # print(feed_board[:,:,:,-1]) + # # print(feed_board.shape) + # + # # npz_board = np.load("/home/yama/rl/tianshou/AlphaGo/data/7f83928932f64a79bc1efdea268698ae.npz") + # # print(npz_board["boards"].shape) + # # feed_board = npz_board["boards"][10].reshape(-1, 19, 19, 17) + # ##print(feed_board) + # # show_board = feed_board[0].transpose(2, 0, 1) + # # print("board shape : ", show_board.shape) + # # print(show_board) + # + # itflag = False + # with multi_gpu.create_session() as sess: + # sess.run(tf.global_variables_initializer()) + # ckpt_file = tf.train.latest_checkpoint(checkpoint_path) + # if ckpt_file is not None: + # # print('Restoring model from {}...'.format(ckpt_file)) + # saver.restore(sess, ckpt_file) + # else: + # raise ValueError("No model loaded") + # res = sess.run([tf.nn.softmax(p), v], feed_dict={x: feed_board, is_training: itflag}) + # # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False}) + # # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True}) + # # print(np.argmax(res[0])) + # np.savetxt(sys.stdout, res[0][0], fmt="%.6f", newline=" ") + # np.savetxt(sys.stdout, res[1][0], fmt="%.6f", newline=" ") + # pv_file = "/home/jialian/stuGotianshou/leela-zero/src/mcts_nn_files/policy_value" + # np.savetxt(pv_file, np.concatenate((res[0][0], res[1][0])), fmt="%.6f", newline=" ") + # # np.savetxt(pv_file, res[1][0], fmt="%.6f", newline=" ") + # return res - result_path = "./checkpoints/" - with multi_gpu.create_session() as sess: - sess.run(tf.global_variables_initializer()) - ckpt_file = tf.train.latest_checkpoint(result_path) - if ckpt_file is not None: - print('Restoring model from {}...'.format(ckpt_file)) - saver.restore(sess, ckpt_file) - for epoch in range(epochs): - for name in data_name: - data = np.load(data_path + name) - boards = data["boards"] - wins = data["wins"] - ps = data["ps"] - print (boards.shape) - print (wins.shape) - print (ps.shape) - batch_num = boards.shape[0] // batch_size - index = np.arange(boards.shape[0]) - np.random.shuffle(index) - value_losses = [] - policy_losses = [] - regs = [] - time_train = -time.time() - for iter in range(batch_num): - lv, lp, r, value, prob, _ = sess.run([value_loss, policy_loss, reg, v, tf.nn.softmax(p), train_op], - feed_dict={x: boards[ - index[iter * batch_size:(iter + 1) * batch_size]], - z: wins[index[ - iter * batch_size:(iter + 1) * batch_size]], - pi: ps[index[ - iter * batch_size:(iter + 1) * batch_size]], - is_training: True}) - value_losses.append(lv) - policy_losses.append(lp) - regs.append(r) - if iter % 1 == 0: - print( - "Epoch: {}, Part {}, Iteration: {}, Time: {}, Value Loss: {}, Policy Loss: {}, Reg: {}".format( - epoch, name, iter, time.time() + time_train, np.mean(np.array(value_losses)), - np.mean(np.array(policy_losses)), np.mean(np.array(regs)))) - time_train = -time.time() - value_losses = [] - policy_losses = [] - regs = [] - if iter % 20 == 0: - save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter) - saver.save(sess, result_path + save_path) - del data, boards, wins, ps - - -# def forward(call_number): -# # checkpoint_path = "/home/yama/rl/tianshou/AlphaGo/checkpoints" -# checkpoint_path = "/home/jialian/stuGo/tianshou/stuGo/checkpoints/" -# board_file = np.genfromtxt("/home/jialian/stuGo/tianshou/leela-zero/src/mcts_nn_files/board_" + call_number, -# dtype='str'); -# human_board = np.zeros((17, 19, 19)) -# -# # TODO : is it ok to ignore the last channel? -# for i in range(17): -# human_board[i] = np.array(list(board_file[i])).reshape(19, 19) -# # print("============================") -# # print("human board sum : " + str(np.sum(human_board[-1]))) -# # print("============================") -# # print(human_board) -# # print("============================") -# # rint(human_board) -# feed_board = human_board.transpose(1, 2, 0).reshape(1, 19, 19, 17) -# # print(feed_board[:,:,:,-1]) -# # print(feed_board.shape) -# -# # npz_board = np.load("/home/yama/rl/tianshou/AlphaGo/data/7f83928932f64a79bc1efdea268698ae.npz") -# # print(npz_board["boards"].shape) -# # feed_board = npz_board["boards"][10].reshape(-1, 19, 19, 17) -# ##print(feed_board) -# # show_board = feed_board[0].transpose(2, 0, 1) -# # print("board shape : ", show_board.shape) -# # print(show_board) -# -# itflag = False -# with multi_gpu.create_session() as sess: -# sess.run(tf.global_variables_initializer()) -# ckpt_file = tf.train.latest_checkpoint(checkpoint_path) -# if ckpt_file is not None: -# # print('Restoring model from {}...'.format(ckpt_file)) -# saver.restore(sess, ckpt_file) -# else: -# raise ValueError("No model loaded") -# res = sess.run([tf.nn.softmax(p), v], feed_dict={x: feed_board, is_training: itflag}) -# # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False}) -# # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True}) -# # print(np.argmax(res[0])) -# np.savetxt(sys.stdout, res[0][0], fmt="%.6f", newline=" ") -# np.savetxt(sys.stdout, res[1][0], fmt="%.6f", newline=" ") -# pv_file = "/home/jialian/stuGotianshou/leela-zero/src/mcts_nn_files/policy_value" -# np.savetxt(pv_file, np.concatenate((res[0][0], res[1][0])), fmt="%.6f", newline=" ") -# # np.savetxt(pv_file, res[1][0], fmt="%.6f", newline=" ") -# return res - -def forward(state): - checkpoint_path = "/home/tongzheng/tianshou/AlphaGo/checkpoints/" - with multi_gpu.create_session() as sess: + def forward(self): + checkpoint_path = "/home/tongzheng/tianshou/AlphaGo/checkpoints/" + sess = multi_gpu.create_session() sess.run(tf.global_variables_initializer()) ckpt_file = tf.train.latest_checkpoint(checkpoint_path) if ckpt_file is not None: print('Restoring model from {}...'.format(ckpt_file)) - saver.restore(sess, ckpt_file) + self.saver.restore(sess, ckpt_file) + print('Successfully loaded') else: raise ValueError("No model loaded") - prior, value = sess.run([tf.nn.softmax(p), v], feed_dict={x: state, is_training: False}) - return prior, value + # prior, value = sess.run([tf.nn.softmax(p), v], feed_dict={x: state, is_training: False}) + # return prior, value + return sess if __name__ == '__main__': - np.set_printoptions(threshold='nan') - # time.sleep(2) - forward(sys.argv[1]) + state = np.random.randint(0, 1, [1, 19, 19, 17]) + net = Network() + sess = net.forward() + start = time.time() + for i in range(100): + sess.run([tf.nn.softmax(net.p), net.v], feed_dict={net.x: state, net.is_training: False}) + print("Step {}, Cumulative time {}".format(i, time.time() - start)) diff --git a/AlphaGo/game.py b/AlphaGo/game.py index aee641f..192697a 100644 --- a/AlphaGo/game.py +++ b/AlphaGo/game.py @@ -1,15 +1,138 @@ # -*- coding: utf-8 -*- # vim:fenc=utf-8 # $File: game.py -# $Date: Fri Nov 17 15:0745 2017 +0800 +# $Date: Tue Nov 28 14:4726 2017 +0800 # $Author: renyong15 © # - -import numpy as np +from __future__ import print_function import utils +import copy +import tensorflow as tf +from collections import deque + import Network from strategy import strategy -from collections import deque + +''' +(1, 1) is considered as the upper left corner of the board, +(size, 1) is the lower left +''' + +DELTA = [[1, 0], [-1, 0], [0, -1], [0, 1]] + + +class Executor: + def __init__(self, **kwargs): + self.game = kwargs['game'] + + def _bfs(self, vertex, color, block, status, alive_break): + block.append(vertex) + status[self.game._flatten(vertex)] = True + nei = self._neighbor(vertex) + for n in nei: + if not status[self.game._flatten(n)]: + if self.game.board[self.game._flatten(n)] == color: + self._bfs(n, color, block, status, alive_break) + + def _find_block(self, vertex, alive_break=False): + block = [] + status = [False] * (self.game.size * self.game.size) + color = self.game.board[self.game._flatten(vertex)] + self._bfs(vertex, color, block, status, alive_break) + + for b in block: + for n in self._neighbor(b): + if self.game.board[self.game._flatten(n)] == utils.EMPTY: + return False, block + return True, block + + def _is_qi(self, color, vertex): + nei = self._neighbor(vertex) + for n in nei: + if self.game.board[self.game._flatten(n)] == utils.EMPTY: + return True + + self.game.board[self.game._flatten(vertex)] = color + for n in nei: + if self.game.board[self.game._flatten(n)] == utils.another_color(color): + can_kill, block = self._find_block(n) + if can_kill: + self.game.board[self.game._flatten(vertex)] = utils.EMPTY + return True + + ### can not suicide + can_kill, block = self._find_block(vertex) + if can_kill: + self.game.board[self.game._flatten(vertex)] = utils.EMPTY + return False + + self.game.board[self.game._flatten(vertex)] = utils.EMPTY + return True + + def _check_global_isomorphous(self, color, vertex): + ##backup + _board = copy.copy(self.game.board) + self.game.board[self.game._flatten(vertex)] = color + self._process_board(color, vertex) + if self.game.board in self.game.history: + res = True + else: + res = False + + self.game.board = _board + return res + + def _in_board(self, vertex): + x, y = vertex + if x < 1 or x > self.game.size: return False + if y < 1 or y > self.game.size: return False + return True + + def _neighbor(self, vertex): + x, y = vertex + nei = [] + for d in DELTA: + _x = x + d[0] + _y = y + d[1] + if self._in_board((_x, _y)): + nei.append((_x, _y)) + return nei + + def _process_board(self, color, vertex): + nei = self._neighbor(vertex) + for n in nei: + if self.game.board[self.game._flatten(n)] == utils.another_color(color): + can_kill, block = self._find_block(n, alive_break=True) + if can_kill: + for b in block: + self.game.board[self.game._flatten(b)] = utils.EMPTY + + def is_valid(self, color, vertex): + ### in board + if not self._in_board(vertex): + return False + + ### already have stone + if not self.game.board[self.game._flatten(vertex)] == utils.EMPTY: + return False + + ### check if it is qi + if not self._is_qi(color, vertex): + return False + + if self._check_global_isomorphous(color, vertex): + return False + + return True + + def do_move(self, color, vertex): + if not self.is_valid(color, vertex): + return False + self.game.board[self.game._flatten(vertex)] = color + self._process_board(color, vertex) + self.game.history.append(copy.copy(self.game.board)) + self.game.past.append(copy.copy(self.game.board)) + return True class Game: @@ -17,14 +140,16 @@ class Game: self.size = size self.komi = 6.5 self.board = [utils.EMPTY] * (self.size * self.size) - self.strategy = strategy(Network.forward) - self.history = deque(maxlen=8) + self.strategy = strategy() + self.executor = Executor(game=self) + self.history = [] + self.past = deque(maxlen=8) for i in range(8): - self.history.append(self.board) + self.past.append(self.board) def _flatten(self, vertex): x, y = vertex - return (x - 1) * self.size + (y - 1) + return (y - 1) * self.size + (x - 1) def clear(self): self.board = [utils.EMPTY] * (self.size * self.size) @@ -36,34 +161,42 @@ class Game: def set_komi(self, k): self.komi = k + def check_valid(self, vertex): + return True + def do_move(self, color, vertex): if vertex == utils.PASS: return True - - id_ = self._flatten(vertex) - if self.board[id_] == utils.EMPTY: - self.board[id_] = color - self.history.append(self.board) - return True - else: - return False - - def step_forward(self, state, action): - if state[0, 0, 0, -1] == 1: - color = 1 - else: - color = -1 - if action == 361: - vertex = (0, 0) - else: - vertex = (action / 19 + 1, action % 19) - self.do_move(color, vertex) - new_state = np.concatenate([state[:, :, :, 1:8], self.board == 1, state[:, :, :, 9:16], 1 - state[:, :, :, -1]], - axis=3) - return new_state, 0 + res = self.executor.do_move(color, vertex) + return res def gen_move(self, color): - move = self.strategy.gen_move(self.history, color) + # move = self.strategy.gen_move(color) + # return move + move = self.strategy.gen_move(self.past, color) + self.do_move(color, move) return move - # return utils.PASS + def status2symbol(self, s): + pool = {utils.WHITE: '#', utils.EMPTY: '.', utils.BLACK: '*', utils.FILL: 'F', utils.UNKNOWN: '?'} + return pool[s] + + def show_board(self): + row = [i for i in range(1, 20)] + col = ' abcdefghijklmnopqrstuvwxyz' + print(' ', end='') + for j in range(self.size + 1): + print(col[j], end=' ') + print('') + for i in range(self.size): + print(row[i], end=' ') + if row[i] < 10: + print(' ', end='') + for j in range(self.size): + print(self.status2symbol(self.board[self._flatten((j + 1, i + 1))]), end=' ') + print('') + + +if __name__ == "__main__": + g = Game() + g.show_board() diff --git a/AlphaGo/strategy.py b/AlphaGo/strategy.py index 3235cf2..99a8e4d 100644 --- a/AlphaGo/strategy.py +++ b/AlphaGo/strategy.py @@ -1,5 +1,8 @@ import numpy as np import utils +import time +import Network +import tensorflow as tf from collections import deque from tianshou.core.mcts.mcts import MCTS @@ -47,9 +50,12 @@ class GoEnv: class strategy(object): - def __init__(self, evaluator): + def __init__(self): self.simulator = GoEnv() - self.evaluator = evaluator + self.net = Network.Network() + self.sess = self.net.forward() + self.evaluator = lambda state: self.sess.run([tf.nn.softmax(self.net.p), self.net.v], + feed_dict={self.net.x: state, self.net.is_training: False}) def data_process(self, history, color): state = np.zeros([1, 19, 19, 17]) @@ -67,7 +73,7 @@ class strategy(object): self.simulator.board = history[-1] state = self.data_process(history, color) prior = self.evaluator(state)[0] - mcts = MCTS(self.simulator, self.evaluator, state, 362, prior, inverse=True, max_step=20) + mcts = MCTS(self.simulator, self.evaluator, state, 362, prior, inverse=True, max_step=100) temp = 1 p = mcts.root.N ** temp / np.sum(mcts.root.N ** temp) choice = np.random.choice(362, 1, p=p).tolist()[0] diff --git a/AlphaGo/test.py b/AlphaGo/test.py index 1d3a3fc..59c5a26 100644 --- a/AlphaGo/test.py +++ b/AlphaGo/test.py @@ -33,112 +33,127 @@ print(res) res = e.run_cmd('7 play BLACK C3') print(res) -res = e.run_cmd('play BLACK C4') -res = e.run_cmd('play BLACK C5') -res = e.run_cmd('play BLACK C6') -res = e.run_cmd('play BLACK D3') -print(res) - -res = e.run_cmd('8 genmove BLACK') -print(res) - -<<<<<<< HEAD:AlphaGo/test.py -res = e.run_cmd('9 genmove WHITE') -print(res) -======= -#g.show_board() -print(g.check_valid((10, 9))) -print(g.executor._neighbor((1,1))) -print(g.do_move(utils.WHITE, (4, 6))) -#g.show_board() +# res = e.run_cmd('play BLACK C4') +# res = e.run_cmd('play BLACK C5') +# res = e.run_cmd('play BLACK C6') +# res = e.run_cmd('play BLACK D3') +# print(res) -res = e.run_cmd('play BLACK L10') -res = e.run_cmd('play BLACK L11') -res = e.run_cmd('play BLACK L12') -res = e.run_cmd('play BLACK L13') -res = e.run_cmd('play BLACK L14') -res = e.run_cmd('play BLACK m15') -res = e.run_cmd('play BLACK m9') -res = e.run_cmd('play BLACK C9') -res = e.run_cmd('play BLACK D9') -res = e.run_cmd('play BLACK E9') -res = e.run_cmd('play BLACK F9') -res = e.run_cmd('play BLACK G9') -res = e.run_cmd('play BLACK H9') -res = e.run_cmd('play BLACK I9') - -res = e.run_cmd('play BLACK N9') -res = e.run_cmd('play BLACK N15') -res = e.run_cmd('play BLACK O10') -res = e.run_cmd('play BLACK O11') -res = e.run_cmd('play BLACK O12') -res = e.run_cmd('play BLACK O13') -res = e.run_cmd('play BLACK O14') -res = e.run_cmd('play BLACK M12') - -res = e.run_cmd('play WHITE M10') -res = e.run_cmd('play WHITE M11') -res = e.run_cmd('play WHITE N10') -res = e.run_cmd('play WHITE N11') - -res = e.run_cmd('play WHITE M13') -res = e.run_cmd('play WHITE M14') -res = e.run_cmd('play WHITE N13') -res = e.run_cmd('play WHITE N14') -print(res) - -res = e.run_cmd('play BLACK N12') -print(res) -#g.show_board() - -res = e.run_cmd('play BLACK P16') -res = e.run_cmd('play BLACK P17') -res = e.run_cmd('play BLACK P18') -res = e.run_cmd('play BLACK P19') -res = e.run_cmd('play BLACK Q16') -res = e.run_cmd('play BLACK R16') -res = e.run_cmd('play BLACK S16') - -res = e.run_cmd('play WHITE S18') -res = e.run_cmd('play WHITE S17') -res = e.run_cmd('play WHITE Q19') -res = e.run_cmd('play WHITE Q18') -res = e.run_cmd('play WHITE Q17') -res = e.run_cmd('play WHITE R18') -res = e.run_cmd('play WHITE R17') -res = e.run_cmd('play BLACK S19') -print(res) -#g.show_board() - -res = e.run_cmd('play WHITE R19') -g.show_board() - -res = e.run_cmd('play BLACK S19') +res = e.run_cmd('8 genmove WHITE') print(res) g.show_board() -res = e.run_cmd('play BLACK S19') -print(res) +# res = e.run_cmd('8 genmove BLACK') +# print(res) +# g.show_board() +# +# res = e.run_cmd('8 genmove WHITE') +# print(res) +# g.show_board() +# +# res = e.run_cmd('8 genmove BLACK') +# print(res) +# g.show_board() +# +# res = e.run_cmd('8 genmove WHITE') +# print(res) +# g.show_board() +# #g.show_board() +# print(g.check_valid((10, 9))) +# print(g.executor._neighbor((1,1))) +# print(g.do_move(utils.WHITE, (4, 6))) +# #g.show_board() +# +# +# res = e.run_cmd('play BLACK L10') +# res = e.run_cmd('play BLACK L11') +# res = e.run_cmd('play BLACK L12') +# res = e.run_cmd('play BLACK L13') +# res = e.run_cmd('play BLACK L14') +# res = e.run_cmd('play BLACK m15') +# res = e.run_cmd('play BLACK m9') +# res = e.run_cmd('play BLACK C9') +# res = e.run_cmd('play BLACK D9') +# res = e.run_cmd('play BLACK E9') +# res = e.run_cmd('play BLACK F9') +# res = e.run_cmd('play BLACK G9') +# res = e.run_cmd('play BLACK H9') +# res = e.run_cmd('play BLACK I9') +# +# res = e.run_cmd('play BLACK N9') +# res = e.run_cmd('play BLACK N15') +# res = e.run_cmd('play BLACK O10') +# res = e.run_cmd('play BLACK O11') +# res = e.run_cmd('play BLACK O12') +# res = e.run_cmd('play BLACK O13') +# res = e.run_cmd('play BLACK O14') +# res = e.run_cmd('play BLACK M12') +# +# res = e.run_cmd('play WHITE M10') +# res = e.run_cmd('play WHITE M11') +# res = e.run_cmd('play WHITE N10') +# res = e.run_cmd('play WHITE N11') +# +# res = e.run_cmd('play WHITE M13') +# res = e.run_cmd('play WHITE M14') +# res = e.run_cmd('play WHITE N13') +# res = e.run_cmd('play WHITE N14') +# print(res) +# +# res = e.run_cmd('play BLACK N12') +# print(res) +# #g.show_board() +# +# res = e.run_cmd('play BLACK P16') +# res = e.run_cmd('play BLACK P17') +# res = e.run_cmd('play BLACK P18') +# res = e.run_cmd('play BLACK P19') +# res = e.run_cmd('play BLACK Q16') +# res = e.run_cmd('play BLACK R16') +# res = e.run_cmd('play BLACK S16') +# +# res = e.run_cmd('play WHITE S18') +# res = e.run_cmd('play WHITE S17') +# res = e.run_cmd('play WHITE Q19') +# res = e.run_cmd('play WHITE Q18') +# res = e.run_cmd('play WHITE Q17') +# res = e.run_cmd('play WHITE R18') +# res = e.run_cmd('play WHITE R17') +# res = e.run_cmd('play BLACK S19') +# print(res) +# #g.show_board() +# +# res = e.run_cmd('play WHITE R19') +# g.show_board() +# +# res = e.run_cmd('play BLACK S19') +# print(res) +# g.show_board() +# +# res = e.run_cmd('play BLACK S19') +# print(res) +# +# +# res = e.run_cmd('play BLACK E17') +# res = e.run_cmd('play BLACK F16') +# res = e.run_cmd('play BLACK F18') +# res = e.run_cmd('play BLACK G17') +# res = e.run_cmd('play WHITE G16') +# res = e.run_cmd('play WHITE G18') +# res = e.run_cmd('play WHITE H17') +# g.show_board() +# +# res = e.run_cmd('play WHITE F17') +# g.show_board() +# +# res = e.run_cmd('play BLACK G17') +# print(res) +# g.show_board() +# +# res = e.run_cmd('play BLACK G19') +# res = e.run_cmd('play BLACK G17') +# g.show_board() -res = e.run_cmd('play BLACK E17') -res = e.run_cmd('play BLACK F16') -res = e.run_cmd('play BLACK F18') -res = e.run_cmd('play BLACK G17') -res = e.run_cmd('play WHITE G16') -res = e.run_cmd('play WHITE G18') -res = e.run_cmd('play WHITE H17') -g.show_board() -res = e.run_cmd('play WHITE F17') -g.show_board() - -res = e.run_cmd('play BLACK G17') -print(res) -g.show_board() - -res = e.run_cmd('play BLACK G19') -res = e.run_cmd('play BLACK G17') -g.show_board() ->>>>>>> gtp:GTP/test.py diff --git a/AlphaGo/utils.py b/AlphaGo/utils.py index 8075381..5437950 100644 --- a/AlphaGo/utils.py +++ b/AlphaGo/utils.py @@ -1,137 +1,20 @@ # -*- coding: utf-8 -*- # vim:fenc=utf-8 # $File: utils.py -# $Date: Fri Nov 17 10:2407 2017 +0800 +# $Date: Mon Nov 27 18:2755 2017 +0800 # $Author: renyong15 © # WHITE = -1 -BLACK = +1 EMPTY = 0 +BLACK = +1 +FILL = +2 +KO = +3 +UNKNOWN = +4 -PASS = (0, 0) +PASS = (0,0) RESIGN = "resign" -from collections import defaultdict -import functools -import itertools -import operator -import random -import re -import time -import gtp -import go - -KGS_COLUMNS = 'ABCDEFGHJKLMNOPQRST' -SGF_COLUMNS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" - - -def parse_sgf_to_flat(sgf): - return flatten_coords(parse_sgf_coords(sgf)) - - -def flatten_coords(c): - return go.N * c[0] + c[1] - - -def unflatten_coords(f): - return divmod(f, go.N) - - -def parse_sgf_coords(s): - 'Interprets coords. aa is top left corner; sa is top right corner' - if s is None or s == '': - return None - return SGF_COLUMNS.index(s[1]), SGF_COLUMNS.index(s[0]) - - -def unparse_sgf_coords(c): - if c is None: - return '' - return SGF_COLUMNS[c[1]] + SGF_COLUMNS[c[0]] - - -def parse_kgs_coords(s): - 'Interprets coords. A1 is bottom left; A9 is top left.' - if s == 'pass': - return None - s = s.upper() - col = KGS_COLUMNS.index(s[0]) - row_from_bottom = int(s[1:]) - 1 - return go.N - row_from_bottom - 1, col - - -def parse_pygtp_coords(vertex): - 'Interprets coords. (1, 1) is bottom left; (1, 9) is top left.' - if vertex in (gtp.PASS, gtp.RESIGN): - return None - return go.N - vertex[1], vertex[0] - 1 - - -def unparse_pygtp_coords(c): - if c is None: - return gtp.PASS - return c[1] + 1, go.N - c[0] - - -def parse_game_result(result): - if re.match(r'[bB]\+', result): - return go.BLACK - elif re.match(r'[wW]\+', result): - return go.WHITE - else: - return None - - -def product(numbers): - return functools.reduce(operator.mul, numbers) - - -def take_n(n, iterable): - return list(itertools.islice(iterable, n)) - - -def iter_chunks(chunk_size, iterator): - while True: - next_chunk = take_n(chunk_size, iterator) - # If len(iterable) % chunk_size == 0, don't return an empty chunk. - if next_chunk: - yield next_chunk - else: - break - - -def shuffler(iterator, pool_size=10 ** 5, refill_threshold=0.9): - yields_between_refills = round(pool_size * (1 - refill_threshold)) - # initialize pool; this step may or may not exhaust the iterator. - pool = take_n(pool_size, iterator) - while True: - random.shuffle(pool) - for i in range(yields_between_refills): - yield pool.pop() - next_batch = take_n(yields_between_refills, iterator) - if not next_batch: - break - pool.extend(next_batch) - # finish consuming whatever's left - no need for further randomization. - yield pool - - -class timer(object): - all_times = defaultdict(float) - - def __init__(self, label): - self.label = label - - def __enter__(self): - self.tick = time.time() - - def __exit__(self, type, value, traceback): - self.tock = time.time() - self.all_times[self.label] += self.tock - self.tick - - @classmethod - def print_times(cls): - for k, v in cls.all_times.items(): - print("%s: %.3f" % (k, v)) +def another_color(color): + return color * -1 diff --git a/GTP/game.py b/GTP/game.py index 76b0750..2d76a7d 100644 --- a/GTP/game.py +++ b/GTP/game.py @@ -4,7 +4,7 @@ # $Date: Tue Nov 28 14:4726 2017 +0800 # $Author: renyong15 © # - +from __future__ import print_function import utils import copy @@ -173,15 +173,20 @@ class Game: def show_board(self): row = [i for i in range(1, 20)] col = ' abcdefghijklmnopqrstuvwxyz' - - for i in range(self.size): - print(row[i]) - if row[i] < 10: - print(' ') - for j in range(self.size): - print(self.status2symbol(self.board[self._flatten((j + 1, i + 1))])) - print('\n') - print(' ') + print(' ', end='') for j in range(self.size + 1): - print(col[j]) + print(col[j], end=' ') + print('\n') + for i in range(self.size): + print(row[i], end=' ') + if row[i] < 10: + print(' ', end='') + for j in range(self.size): + print(self.status2symbol(self.board[self._flatten((j + 1, i + 1))]), end=' ') + print('\n') + + +if __name__=="__main__": + g = Game() + g.show_board() \ No newline at end of file diff --git a/GTP/utils.py b/GTP/utils.py index b7ce00c..c4114a3 100644 --- a/GTP/utils.py +++ b/GTP/utils.py @@ -17,6 +17,3 @@ RESIGN = "resign" def another_color(color): return color * -1 - - -