From 13738f13c4036bb18655d7bef68bc5d5e355fa24 Mon Sep 17 00:00:00 2001
From: rtz19970824 <rtz19970824@gmail.com>
Date: Tue, 28 Nov 2017 17:00:10 +0800
Subject: [PATCH] merge gtp

---
 AlphaGo/Network.py  | 280 +++++++++++++++++++++++---------------------
 AlphaGo/game.py     | 197 ++++++++++++++++++++++++++-----
 AlphaGo/strategy.py |  12 +-
 AlphaGo/test.py     | 217 ++++++++++++++++++----------------
 AlphaGo/utils.py    | 133 ++-------------------
 GTP/game.py         |  27 +++--
 GTP/utils.py        |   3 -
 7 files changed, 460 insertions(+), 409 deletions(-)

diff --git a/AlphaGo/Network.py b/AlphaGo/Network.py
index 696c20a..caf7710 100644
--- a/AlphaGo/Network.py
+++ b/AlphaGo/Network.py
@@ -3,6 +3,7 @@ import time
 import sys
 
 import numpy as np
+import time
 import tensorflow as tf
 import tensorflow.contrib.layers as layers
 
@@ -49,151 +50,162 @@ def value_heads(input, is_training):
     return h
 
 
-x = tf.placeholder(tf.float32, shape=[None, 19, 19, 17])
-is_training = tf.placeholder(tf.bool, shape=[])
-z = tf.placeholder(tf.float32, shape=[None, 1])
-pi = tf.placeholder(tf.float32, shape=[None, 362])
+class Network(object):
+    def __init__(self):
+        self.x = tf.placeholder(tf.float32, shape=[None, 19, 19, 17])
+        self.is_training = tf.placeholder(tf.bool, shape=[])
+        self.z = tf.placeholder(tf.float32, shape=[None, 1])
+        self.pi = tf.placeholder(tf.float32, shape=[None, 362])
+        self.build_network()
 
-h = layers.conv2d(x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm,
-                  normalizer_params={'is_training': is_training, 'updates_collections': tf.GraphKeys.UPDATE_OPS},
-                  weights_regularizer=layers.l2_regularizer(1e-4))
-for i in range(19):
-    h = residual_block(h, is_training)
-v = value_heads(h, is_training)
-p = policy_heads(h, is_training)
-# loss = tf.reduce_mean(tf.square(z-v)) - tf.multiply(pi, tf.log(tf.clip_by_value(tf.nn.softmax(p), 1e-8, tf.reduce_max(tf.nn.softmax(p)))))
-value_loss = tf.reduce_mean(tf.square(z - v))
-policy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=pi, logits=p))
+    def build_network(self):
+        h = layers.conv2d(self.x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm,
+                          normalizer_params={'is_training': self.is_training,
+                                             'updates_collections': tf.GraphKeys.UPDATE_OPS},
+                          weights_regularizer=layers.l2_regularizer(1e-4))
+        for i in range(19):
+            h = residual_block(h, self.is_training)
+        self.v = value_heads(h, self.is_training)
+        self.p = policy_heads(h, self.is_training)
+        # loss = tf.reduce_mean(tf.square(z-v)) - tf.multiply(pi, tf.log(tf.clip_by_value(tf.nn.softmax(p), 1e-8, tf.reduce_max(tf.nn.softmax(p)))))
+        self.value_loss = tf.reduce_mean(tf.square(self.z - self.v))
+        self.policy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.pi, logits=self.p))
 
-reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
-total_loss = value_loss + policy_loss + reg
-# train_op = tf.train.MomentumOptimizer(1e-4, momentum=0.9, use_nesterov=True).minimize(total_loss)
-update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-with tf.control_dependencies(update_ops):
-    train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
-var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
-saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
+        self.reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+        self.total_loss = self.value_loss + self.policy_loss + self.reg
+        # train_op = tf.train.MomentumOptimizer(1e-4, momentum=0.9, use_nesterov=True).minimize(total_loss)
+        self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+        with tf.control_dependencies(self.update_ops):
+            self.train_op = tf.train.RMSPropOptimizer(1e-4).minimize(self.total_loss)
+        self.var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
+        self.saver = tf.train.Saver(max_to_keep=10, var_list=self.var_list)
+
+    def train(self):
+        data_path = "/home/tongzheng/data/"
+        data_name = os.listdir("/home/tongzheng/data/")
+        epochs = 100
+        batch_size = 128
+
+        result_path = "./checkpoints/"
+        with multi_gpu.create_session() as sess:
+            sess.run(tf.global_variables_initializer())
+            ckpt_file = tf.train.latest_checkpoint(result_path)
+            if ckpt_file is not None:
+                print('Restoring model from {}...'.format(ckpt_file))
+                self.saver.restore(sess, ckpt_file)
+            for epoch in range(epochs):
+                for name in data_name:
+                    data = np.load(data_path + name)
+                    boards = data["boards"]
+                    wins = data["wins"]
+                    ps = data["ps"]
+                    print (boards.shape)
+                    print (wins.shape)
+                    print (ps.shape)
+                    batch_num = boards.shape[0] // batch_size
+                    index = np.arange(boards.shape[0])
+                    np.random.shuffle(index)
+                    value_losses = []
+                    policy_losses = []
+                    regs = []
+                    time_train = -time.time()
+                    for iter in range(batch_num):
+                        lv, lp, r, value, prob, _ = sess.run(
+                            [self.value_loss, self.policy_loss, self.reg, self.v, tf.nn.softmax(p), self.train_op],
+                            feed_dict={self.x: boards[
+                                index[iter * batch_size:(iter + 1) * batch_size]],
+                                       self.z: wins[index[
+                                               iter * batch_size:(iter + 1) * batch_size]],
+                                       self.pi: ps[index[
+                                              iter * batch_size:(iter + 1) * batch_size]],
+                                       self.is_training: True})
+                        value_losses.append(lv)
+                        policy_losses.append(lp)
+                        regs.append(r)
+                        if iter % 1 == 0:
+                            print(
+                                "Epoch: {}, Part {}, Iteration: {}, Time: {}, Value Loss: {}, Policy Loss: {}, Reg: {}".format(
+                                    epoch, name, iter, time.time() + time_train, np.mean(np.array(value_losses)),
+                                    np.mean(np.array(policy_losses)), np.mean(np.array(regs))))
+                            time_train = -time.time()
+                            value_losses = []
+                            policy_losses = []
+                            regs = []
+                        if iter % 20 == 0:
+                            save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter)
+                            self.saver.save(sess, result_path + save_path)
+                    del data, boards, wins, ps
 
 
-def train():
-    data_path = "/home/tongzheng/data/"
-    data_name = os.listdir("/home/tongzheng/data/")
-    epochs = 100
-    batch_size = 128
+                # def forward(call_number):
+                #     # checkpoint_path = "/home/yama/rl/tianshou/AlphaGo/checkpoints"
+                #     checkpoint_path = "/home/jialian/stuGo/tianshou/stuGo/checkpoints/"
+                #     board_file = np.genfromtxt("/home/jialian/stuGo/tianshou/leela-zero/src/mcts_nn_files/board_" + call_number,
+                #                                dtype='str');
+                #     human_board = np.zeros((17, 19, 19))
+                #
+                #     # TODO : is it ok to ignore the last channel?
+                #     for i in range(17):
+                #         human_board[i] = np.array(list(board_file[i])).reshape(19, 19)
+                #     # print("============================")
+                #     # print("human board sum : " + str(np.sum(human_board[-1])))
+                #     # print("============================")
+                #     # print(human_board)
+                #     # print("============================")
+                #     # rint(human_board)
+                #     feed_board = human_board.transpose(1, 2, 0).reshape(1, 19, 19, 17)
+                #     # print(feed_board[:,:,:,-1])
+                #     # print(feed_board.shape)
+                #
+                #     # npz_board = np.load("/home/yama/rl/tianshou/AlphaGo/data/7f83928932f64a79bc1efdea268698ae.npz")
+                #     # print(npz_board["boards"].shape)
+                #     # feed_board = npz_board["boards"][10].reshape(-1, 19, 19, 17)
+                #     ##print(feed_board)
+                #     # show_board = feed_board[0].transpose(2, 0, 1)
+                #     # print("board shape : ", show_board.shape)
+                #     # print(show_board)
+                #
+                #     itflag = False
+                #     with multi_gpu.create_session() as sess:
+                #         sess.run(tf.global_variables_initializer())
+                #         ckpt_file = tf.train.latest_checkpoint(checkpoint_path)
+                #         if ckpt_file is not None:
+                #             # print('Restoring model from {}...'.format(ckpt_file))
+                #             saver.restore(sess, ckpt_file)
+                #         else:
+                #             raise ValueError("No model loaded")
+                #         res = sess.run([tf.nn.softmax(p), v], feed_dict={x: feed_board, is_training: itflag})
+                #         # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False})
+                #         # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True})
+                #         # print(np.argmax(res[0]))
+                #         np.savetxt(sys.stdout, res[0][0], fmt="%.6f", newline=" ")
+                #         np.savetxt(sys.stdout, res[1][0], fmt="%.6f", newline=" ")
+                #         pv_file = "/home/jialian/stuGotianshou/leela-zero/src/mcts_nn_files/policy_value"
+                #         np.savetxt(pv_file, np.concatenate((res[0][0], res[1][0])), fmt="%.6f", newline=" ")
+                #     # np.savetxt(pv_file, res[1][0], fmt="%.6f", newline=" ")
+                #     return res
 
-    result_path = "./checkpoints/"
-    with multi_gpu.create_session() as sess:
-        sess.run(tf.global_variables_initializer())
-        ckpt_file = tf.train.latest_checkpoint(result_path)
-        if ckpt_file is not None:
-            print('Restoring model from {}...'.format(ckpt_file))
-            saver.restore(sess, ckpt_file)
-        for epoch in range(epochs):
-            for name in data_name:
-                data = np.load(data_path + name)
-                boards = data["boards"]
-                wins = data["wins"]
-                ps = data["ps"]
-                print (boards.shape)
-                print (wins.shape)
-                print (ps.shape)
-                batch_num = boards.shape[0] // batch_size
-                index = np.arange(boards.shape[0])
-                np.random.shuffle(index)
-                value_losses = []
-                policy_losses = []
-                regs = []
-                time_train = -time.time()
-                for iter in range(batch_num):
-                    lv, lp, r, value, prob, _ = sess.run([value_loss, policy_loss, reg, v, tf.nn.softmax(p), train_op],
-                                                         feed_dict={x: boards[
-                                                             index[iter * batch_size:(iter + 1) * batch_size]],
-                                                                    z: wins[index[
-                                                                            iter * batch_size:(iter + 1) * batch_size]],
-                                                                    pi: ps[index[
-                                                                           iter * batch_size:(iter + 1) * batch_size]],
-                                                                    is_training: True})
-                    value_losses.append(lv)
-                    policy_losses.append(lp)
-                    regs.append(r)
-                    if iter % 1 == 0:
-                        print(
-                            "Epoch: {}, Part {}, Iteration: {}, Time: {}, Value Loss: {}, Policy Loss: {}, Reg: {}".format(
-                                epoch, name, iter, time.time() + time_train, np.mean(np.array(value_losses)),
-                                np.mean(np.array(policy_losses)), np.mean(np.array(regs))))
-                        time_train = -time.time()
-                        value_losses = []
-                        policy_losses = []
-                        regs = []
-                    if iter % 20 == 0:
-                        save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter)
-                        saver.save(sess, result_path + save_path)
-                del data, boards, wins, ps
-
-
-# def forward(call_number):
-#     # checkpoint_path = "/home/yama/rl/tianshou/AlphaGo/checkpoints"
-#     checkpoint_path = "/home/jialian/stuGo/tianshou/stuGo/checkpoints/"
-#     board_file = np.genfromtxt("/home/jialian/stuGo/tianshou/leela-zero/src/mcts_nn_files/board_" + call_number,
-#                                dtype='str');
-#     human_board = np.zeros((17, 19, 19))
-#
-#     # TODO : is it ok to ignore the last channel?
-#     for i in range(17):
-#         human_board[i] = np.array(list(board_file[i])).reshape(19, 19)
-#     # print("============================")
-#     # print("human board sum : " + str(np.sum(human_board[-1])))
-#     # print("============================")
-#     # print(human_board)
-#     # print("============================")
-#     # rint(human_board)
-#     feed_board = human_board.transpose(1, 2, 0).reshape(1, 19, 19, 17)
-#     # print(feed_board[:,:,:,-1])
-#     # print(feed_board.shape)
-#
-#     # npz_board = np.load("/home/yama/rl/tianshou/AlphaGo/data/7f83928932f64a79bc1efdea268698ae.npz")
-#     # print(npz_board["boards"].shape)
-#     # feed_board = npz_board["boards"][10].reshape(-1, 19, 19, 17)
-#     ##print(feed_board)
-#     # show_board = feed_board[0].transpose(2, 0, 1)
-#     # print("board shape : ", show_board.shape)
-#     # print(show_board)
-#
-#     itflag = False
-#     with multi_gpu.create_session() as sess:
-#         sess.run(tf.global_variables_initializer())
-#         ckpt_file = tf.train.latest_checkpoint(checkpoint_path)
-#         if ckpt_file is not None:
-#             # print('Restoring model from {}...'.format(ckpt_file))
-#             saver.restore(sess, ckpt_file)
-#         else:
-#             raise ValueError("No model loaded")
-#         res = sess.run([tf.nn.softmax(p), v], feed_dict={x: feed_board, is_training: itflag})
-#         # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False})
-#         # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True})
-#         # print(np.argmax(res[0]))
-#         np.savetxt(sys.stdout, res[0][0], fmt="%.6f", newline=" ")
-#         np.savetxt(sys.stdout, res[1][0], fmt="%.6f", newline=" ")
-#         pv_file = "/home/jialian/stuGotianshou/leela-zero/src/mcts_nn_files/policy_value"
-#         np.savetxt(pv_file, np.concatenate((res[0][0], res[1][0])), fmt="%.6f", newline=" ")
-#     # np.savetxt(pv_file, res[1][0], fmt="%.6f", newline=" ")
-#     return res
-
-def forward(state):
-    checkpoint_path = "/home/tongzheng/tianshou/AlphaGo/checkpoints/"
-    with multi_gpu.create_session() as sess:
+    def forward(self):
+        checkpoint_path = "/home/tongzheng/tianshou/AlphaGo/checkpoints/"
+        sess = multi_gpu.create_session()
         sess.run(tf.global_variables_initializer())
         ckpt_file = tf.train.latest_checkpoint(checkpoint_path)
         if ckpt_file is not None:
             print('Restoring model from {}...'.format(ckpt_file))
-            saver.restore(sess, ckpt_file)
+            self.saver.restore(sess, ckpt_file)
+            print('Successfully loaded')
         else:
             raise ValueError("No model loaded")
-        prior, value = sess.run([tf.nn.softmax(p), v], feed_dict={x: state, is_training: False})
-    return prior, value
+        # prior, value = sess.run([tf.nn.softmax(p), v], feed_dict={x: state, is_training: False})
+        # return prior, value
+        return sess
 
 
 if __name__ == '__main__':
-    np.set_printoptions(threshold='nan')
-    # time.sleep(2)
-    forward(sys.argv[1])
+    state = np.random.randint(0, 1, [1, 19, 19, 17])
+    net = Network()
+    sess = net.forward()
+    start = time.time()
+    for i in range(100):
+        sess.run([tf.nn.softmax(net.p), net.v], feed_dict={net.x: state, net.is_training: False})
+        print("Step {}, Cumulative time {}".format(i, time.time() - start))
diff --git a/AlphaGo/game.py b/AlphaGo/game.py
index aee641f..192697a 100644
--- a/AlphaGo/game.py
+++ b/AlphaGo/game.py
@@ -1,15 +1,138 @@
 # -*- coding: utf-8 -*-
 # vim:fenc=utf-8
 # $File: game.py
-# $Date: Fri Nov 17 15:0745 2017 +0800
+# $Date: Tue Nov 28 14:4726 2017 +0800
 # $Author: renyong15 © <mails.tsinghua.edu.cn>
 #
-
-import numpy as np
+from __future__ import print_function
 import utils
+import copy
+import tensorflow as tf
+from collections import deque
+
 import Network
 from strategy import strategy
-from collections import deque
+
+'''
+(1, 1) is considered as the upper left corner of the board,
+(size, 1) is the lower left
+'''
+
+DELTA = [[1, 0], [-1, 0], [0, -1], [0, 1]]
+
+
+class Executor:
+    def __init__(self, **kwargs):
+        self.game = kwargs['game']
+
+    def _bfs(self, vertex, color, block, status, alive_break):
+        block.append(vertex)
+        status[self.game._flatten(vertex)] = True
+        nei = self._neighbor(vertex)
+        for n in nei:
+            if not status[self.game._flatten(n)]:
+                if self.game.board[self.game._flatten(n)] == color:
+                    self._bfs(n, color, block, status, alive_break)
+
+    def _find_block(self, vertex, alive_break=False):
+        block = []
+        status = [False] * (self.game.size * self.game.size)
+        color = self.game.board[self.game._flatten(vertex)]
+        self._bfs(vertex, color, block, status, alive_break)
+
+        for b in block:
+            for n in self._neighbor(b):
+                if self.game.board[self.game._flatten(n)] == utils.EMPTY:
+                    return False, block
+        return True, block
+
+    def _is_qi(self, color, vertex):
+        nei = self._neighbor(vertex)
+        for n in nei:
+            if self.game.board[self.game._flatten(n)] == utils.EMPTY:
+                return True
+
+        self.game.board[self.game._flatten(vertex)] = color
+        for n in nei:
+            if self.game.board[self.game._flatten(n)] == utils.another_color(color):
+                can_kill, block = self._find_block(n)
+                if can_kill:
+                    self.game.board[self.game._flatten(vertex)] = utils.EMPTY
+                    return True
+
+        ### can not suicide
+        can_kill, block = self._find_block(vertex)
+        if can_kill:
+            self.game.board[self.game._flatten(vertex)] = utils.EMPTY
+            return False
+
+        self.game.board[self.game._flatten(vertex)] = utils.EMPTY
+        return True
+
+    def _check_global_isomorphous(self, color, vertex):
+        ##backup
+        _board = copy.copy(self.game.board)
+        self.game.board[self.game._flatten(vertex)] = color
+        self._process_board(color, vertex)
+        if self.game.board in self.game.history:
+            res = True
+        else:
+            res = False
+
+        self.game.board = _board
+        return res
+
+    def _in_board(self, vertex):
+        x, y = vertex
+        if x < 1 or x > self.game.size: return False
+        if y < 1 or y > self.game.size: return False
+        return True
+
+    def _neighbor(self, vertex):
+        x, y = vertex
+        nei = []
+        for d in DELTA:
+            _x = x + d[0]
+            _y = y + d[1]
+            if self._in_board((_x, _y)):
+                nei.append((_x, _y))
+        return nei
+
+    def _process_board(self, color, vertex):
+        nei = self._neighbor(vertex)
+        for n in nei:
+            if self.game.board[self.game._flatten(n)] == utils.another_color(color):
+                can_kill, block = self._find_block(n, alive_break=True)
+                if can_kill:
+                    for b in block:
+                        self.game.board[self.game._flatten(b)] = utils.EMPTY
+
+    def is_valid(self, color, vertex):
+        ### in board
+        if not self._in_board(vertex):
+            return False
+
+        ### already have stone
+        if not self.game.board[self.game._flatten(vertex)] == utils.EMPTY:
+            return False
+
+        ### check if it is qi
+        if not self._is_qi(color, vertex):
+            return False
+
+        if self._check_global_isomorphous(color, vertex):
+            return False
+
+        return True
+
+    def do_move(self, color, vertex):
+        if not self.is_valid(color, vertex):
+            return False
+        self.game.board[self.game._flatten(vertex)] = color
+        self._process_board(color, vertex)
+        self.game.history.append(copy.copy(self.game.board))
+        self.game.past.append(copy.copy(self.game.board))
+        return True
 
 
 class Game:
@@ -17,14 +140,16 @@ class Game:
         self.size = size
         self.komi = 6.5
         self.board = [utils.EMPTY] * (self.size * self.size)
-        self.strategy = strategy(Network.forward)
-        self.history = deque(maxlen=8)
+        self.strategy = strategy()
+        self.executor = Executor(game=self)
+        self.history = []
+        self.past = deque(maxlen=8)
         for i in range(8):
-            self.history.append(self.board)
+            self.past.append(self.board)
 
     def _flatten(self, vertex):
         x, y = vertex
-        return (x - 1) * self.size + (y - 1)
+        return (y - 1) * self.size + (x - 1)
 
     def clear(self):
         self.board = [utils.EMPTY] * (self.size * self.size)
@@ -36,34 +161,42 @@ class Game:
     def set_komi(self, k):
         self.komi = k
 
+    def check_valid(self, vertex):
+        return True
+
     def do_move(self, color, vertex):
         if vertex == utils.PASS:
             return True
-
-        id_ = self._flatten(vertex)
-        if self.board[id_] == utils.EMPTY:
-            self.board[id_] = color
-            self.history.append(self.board)
-            return True
-        else:
-            return False
-
-    def step_forward(self, state, action):
-        if state[0, 0, 0, -1] == 1:
-            color = 1
-        else:
-            color = -1
-        if action == 361:
-            vertex = (0, 0)
-        else:
-            vertex = (action / 19 + 1, action % 19)
-        self.do_move(color, vertex)
-        new_state = np.concatenate([state[:, :, :, 1:8], self.board == 1, state[:, :, :, 9:16], 1 - state[:, :, :, -1]],
-                                   axis=3)
-        return new_state, 0
+        res = self.executor.do_move(color, vertex)
+        return res
 
     def gen_move(self, color):
-        move = self.strategy.gen_move(self.history, color)
+        # move = self.strategy.gen_move(color)
+        # return move
+        move = self.strategy.gen_move(self.past, color)
+        self.do_move(color, move)
         return move
-        # return utils.PASS
 
+    def status2symbol(self, s):
+        pool = {utils.WHITE: '#', utils.EMPTY: '.', utils.BLACK: '*', utils.FILL: 'F', utils.UNKNOWN: '?'}
+        return pool[s]
+
+    def show_board(self):
+        row = [i for i in range(1, 20)]
+        col = ' abcdefghijklmnopqrstuvwxyz'
+        print(' ', end='')
+        for j in range(self.size + 1):
+            print(col[j], end='  ')
+        print('')
+        for i in range(self.size):
+            print(row[i], end='  ')
+            if row[i] < 10:
+                print(' ', end='')
+            for j in range(self.size):
+                print(self.status2symbol(self.board[self._flatten((j + 1, i + 1))]), end='  ')
+            print('')
+
+
+if __name__ == "__main__":
+    g = Game()
+    g.show_board()
diff --git a/AlphaGo/strategy.py b/AlphaGo/strategy.py
index 3235cf2..99a8e4d 100644
--- a/AlphaGo/strategy.py
+++ b/AlphaGo/strategy.py
@@ -1,5 +1,8 @@
 import numpy as np
 import utils
+import time
+import Network
+import tensorflow as tf
 from collections import deque
 from tianshou.core.mcts.mcts import MCTS
 
@@ -47,9 +50,12 @@ class GoEnv:
 
 
 class strategy(object):
-    def __init__(self, evaluator):
+    def __init__(self):
         self.simulator = GoEnv()
-        self.evaluator = evaluator
+        self.net = Network.Network()
+        self.sess = self.net.forward()
+        self.evaluator = lambda state: self.sess.run([tf.nn.softmax(self.net.p), self.net.v],
+                                                     feed_dict={self.net.x: state, self.net.is_training: False})
 
     def data_process(self, history, color):
         state = np.zeros([1, 19, 19, 17])
@@ -67,7 +73,7 @@ class strategy(object):
         self.simulator.board = history[-1]
         state = self.data_process(history, color)
         prior = self.evaluator(state)[0]
-        mcts = MCTS(self.simulator, self.evaluator, state, 362, prior, inverse=True, max_step=20)
+        mcts = MCTS(self.simulator, self.evaluator, state, 362, prior, inverse=True, max_step=100)
         temp = 1
         p = mcts.root.N ** temp / np.sum(mcts.root.N ** temp)
         choice = np.random.choice(362, 1, p=p).tolist()[0]
diff --git a/AlphaGo/test.py b/AlphaGo/test.py
index 1d3a3fc..59c5a26 100644
--- a/AlphaGo/test.py
+++ b/AlphaGo/test.py
@@ -33,112 +33,127 @@ print(res)
 res = e.run_cmd('7 play BLACK C3')
 print(res)
 
-res = e.run_cmd('play BLACK C4')
-res = e.run_cmd('play BLACK C5')
-res = e.run_cmd('play BLACK C6')
-res = e.run_cmd('play BLACK D3')
-print(res)
-
-res = e.run_cmd('8 genmove BLACK')
-print(res)
-
-<<<<<<< HEAD:AlphaGo/test.py
-res = e.run_cmd('9 genmove WHITE')
-print(res)
-=======
-#g.show_board()
-print(g.check_valid((10, 9)))
-print(g.executor._neighbor((1,1)))
-print(g.do_move(utils.WHITE, (4, 6)))
-#g.show_board()
+# res = e.run_cmd('play BLACK C4')
+# res = e.run_cmd('play BLACK C5')
+# res = e.run_cmd('play BLACK C6')
+# res = e.run_cmd('play BLACK D3')
+# print(res)
 
 
-res = e.run_cmd('play BLACK L10')
-res = e.run_cmd('play BLACK L11')
-res = e.run_cmd('play BLACK L12')
-res = e.run_cmd('play BLACK L13')
-res = e.run_cmd('play BLACK L14')
-res = e.run_cmd('play BLACK m15')
-res = e.run_cmd('play BLACK m9')
-res = e.run_cmd('play BLACK C9')
-res = e.run_cmd('play BLACK D9')
-res = e.run_cmd('play BLACK E9')
-res = e.run_cmd('play BLACK F9')
-res = e.run_cmd('play BLACK G9')
-res = e.run_cmd('play BLACK H9')
-res = e.run_cmd('play BLACK I9')
-
-res = e.run_cmd('play BLACK N9')
-res = e.run_cmd('play BLACK N15')
-res = e.run_cmd('play BLACK O10')
-res = e.run_cmd('play BLACK O11')
-res = e.run_cmd('play BLACK O12')
-res = e.run_cmd('play BLACK O13')
-res = e.run_cmd('play BLACK O14')
-res = e.run_cmd('play BLACK M12')
-
-res = e.run_cmd('play WHITE M10')
-res = e.run_cmd('play WHITE M11')
-res = e.run_cmd('play WHITE N10')
-res = e.run_cmd('play WHITE N11')
-
-res = e.run_cmd('play WHITE M13')
-res = e.run_cmd('play WHITE M14')
-res = e.run_cmd('play WHITE N13')
-res = e.run_cmd('play WHITE N14')
-print(res)
-
-res = e.run_cmd('play BLACK N12')
-print(res)
-#g.show_board()
-
-res = e.run_cmd('play BLACK P16')
-res = e.run_cmd('play BLACK P17')
-res = e.run_cmd('play BLACK P18')
-res = e.run_cmd('play BLACK P19')
-res = e.run_cmd('play BLACK Q16')
-res = e.run_cmd('play BLACK R16')
-res = e.run_cmd('play BLACK S16')
-
-res = e.run_cmd('play WHITE S18')
-res = e.run_cmd('play WHITE S17')
-res = e.run_cmd('play WHITE Q19')
-res = e.run_cmd('play WHITE Q18')
-res = e.run_cmd('play WHITE Q17')
-res = e.run_cmd('play WHITE R18')
-res = e.run_cmd('play WHITE R17')
-res = e.run_cmd('play BLACK S19')
-print(res)
-#g.show_board()
-
-res = e.run_cmd('play WHITE R19')
-g.show_board()
-
-res = e.run_cmd('play BLACK S19')
+res = e.run_cmd('8 genmove WHITE')
 print(res)
 g.show_board()
 
-res = e.run_cmd('play BLACK S19')
-print(res)
+# res = e.run_cmd('8 genmove BLACK')
+# print(res)
+# g.show_board()
+#
+# res = e.run_cmd('8 genmove WHITE')
+# print(res)
+# g.show_board()
+#
+# res = e.run_cmd('8 genmove BLACK')
+# print(res)
+# g.show_board()
+#
+# res = e.run_cmd('8 genmove WHITE')
+# print(res)
+# g.show_board()
+# #g.show_board()
+# print(g.check_valid((10, 9)))
+# print(g.executor._neighbor((1,1)))
+# print(g.do_move(utils.WHITE, (4, 6)))
+# #g.show_board()
+#
+#
+# res = e.run_cmd('play BLACK L10')
+# res = e.run_cmd('play BLACK L11')
+# res = e.run_cmd('play BLACK L12')
+# res = e.run_cmd('play BLACK L13')
+# res = e.run_cmd('play BLACK L14')
+# res = e.run_cmd('play BLACK m15')
+# res = e.run_cmd('play BLACK m9')
+# res = e.run_cmd('play BLACK C9')
+# res = e.run_cmd('play BLACK D9')
+# res = e.run_cmd('play BLACK E9')
+# res = e.run_cmd('play BLACK F9')
+# res = e.run_cmd('play BLACK G9')
+# res = e.run_cmd('play BLACK H9')
+# res = e.run_cmd('play BLACK I9')
+#
+# res = e.run_cmd('play BLACK N9')
+# res = e.run_cmd('play BLACK N15')
+# res = e.run_cmd('play BLACK O10')
+# res = e.run_cmd('play BLACK O11')
+# res = e.run_cmd('play BLACK O12')
+# res = e.run_cmd('play BLACK O13')
+# res = e.run_cmd('play BLACK O14')
+# res = e.run_cmd('play BLACK M12')
+#
+# res = e.run_cmd('play WHITE M10')
+# res = e.run_cmd('play WHITE M11')
+# res = e.run_cmd('play WHITE N10')
+# res = e.run_cmd('play WHITE N11')
+#
+# res = e.run_cmd('play WHITE M13')
+# res = e.run_cmd('play WHITE M14')
+# res = e.run_cmd('play WHITE N13')
+# res = e.run_cmd('play WHITE N14')
+# print(res)
+#
+# res = e.run_cmd('play BLACK N12')
+# print(res)
+# #g.show_board()
+#
+# res = e.run_cmd('play BLACK P16')
+# res = e.run_cmd('play BLACK P17')
+# res = e.run_cmd('play BLACK P18')
+# res = e.run_cmd('play BLACK P19')
+# res = e.run_cmd('play BLACK Q16')
+# res = e.run_cmd('play BLACK R16')
+# res = e.run_cmd('play BLACK S16')
+#
+# res = e.run_cmd('play WHITE S18')
+# res = e.run_cmd('play WHITE S17')
+# res = e.run_cmd('play WHITE Q19')
+# res = e.run_cmd('play WHITE Q18')
+# res = e.run_cmd('play WHITE Q17')
+# res = e.run_cmd('play WHITE R18')
+# res = e.run_cmd('play WHITE R17')
+# res = e.run_cmd('play BLACK S19')
+# print(res)
+# #g.show_board()
+#
+# res = e.run_cmd('play WHITE R19')
+# g.show_board()
+#
+# res = e.run_cmd('play BLACK S19')
+# print(res)
+# g.show_board()
+#
+# res = e.run_cmd('play BLACK S19')
+# print(res)
+#
+#
+# res = e.run_cmd('play BLACK E17')
+# res = e.run_cmd('play BLACK F16')
+# res = e.run_cmd('play BLACK F18')
+# res = e.run_cmd('play BLACK G17')
+# res = e.run_cmd('play WHITE G16')
+# res = e.run_cmd('play WHITE G18')
+# res = e.run_cmd('play WHITE H17')
+# g.show_board()
+#
+# res = e.run_cmd('play WHITE F17')
+# g.show_board()
+#
+# res = e.run_cmd('play BLACK G17')
+# print(res)
+# g.show_board()
+#
+# res = e.run_cmd('play BLACK G19')
+# res = e.run_cmd('play BLACK G17')
+# g.show_board()
 
 
-res = e.run_cmd('play BLACK E17')
-res = e.run_cmd('play BLACK F16')
-res = e.run_cmd('play BLACK F18')
-res = e.run_cmd('play BLACK G17')
-res = e.run_cmd('play WHITE G16')
-res = e.run_cmd('play WHITE G18')
-res = e.run_cmd('play WHITE H17')
-g.show_board()
 
-res = e.run_cmd('play WHITE F17')
-g.show_board()
-
-res = e.run_cmd('play BLACK G17')
-print(res)
-g.show_board()
-
-res = e.run_cmd('play BLACK G19')
-res = e.run_cmd('play BLACK G17')
-g.show_board()
->>>>>>> gtp:GTP/test.py
diff --git a/AlphaGo/utils.py b/AlphaGo/utils.py
index 8075381..5437950 100644
--- a/AlphaGo/utils.py
+++ b/AlphaGo/utils.py
@@ -1,137 +1,20 @@
 # -*- coding: utf-8 -*-
 # vim:fenc=utf-8
 # $File: utils.py
-# $Date: Fri Nov 17 10:2407 2017 +0800
+# $Date: Mon Nov 27 18:2755 2017 +0800
 # $Author: renyong15 © <mails.tsinghua.edu.cn>
 #
 
 WHITE = -1
-BLACK = +1
 EMPTY = 0
+BLACK = +1
+FILL = +2
+KO = +3
+UNKNOWN = +4
 
-PASS = (0, 0)
+PASS = (0,0)
 RESIGN = "resign"
 
-from collections import defaultdict
-import functools
-import itertools
-import operator
-import random
-import re
-import time
 
-import gtp
-import go
-
-KGS_COLUMNS = 'ABCDEFGHJKLMNOPQRST'
-SGF_COLUMNS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
-
-
-def parse_sgf_to_flat(sgf):
-    return flatten_coords(parse_sgf_coords(sgf))
-
-
-def flatten_coords(c):
-    return go.N * c[0] + c[1]
-
-
-def unflatten_coords(f):
-    return divmod(f, go.N)
-
-
-def parse_sgf_coords(s):
-    'Interprets coords. aa is top left corner; sa is top right corner'
-    if s is None or s == '':
-        return None
-    return SGF_COLUMNS.index(s[1]), SGF_COLUMNS.index(s[0])
-
-
-def unparse_sgf_coords(c):
-    if c is None:
-        return ''
-    return SGF_COLUMNS[c[1]] + SGF_COLUMNS[c[0]]
-
-
-def parse_kgs_coords(s):
-    'Interprets coords. A1 is bottom left; A9 is top left.'
-    if s == 'pass':
-        return None
-    s = s.upper()
-    col = KGS_COLUMNS.index(s[0])
-    row_from_bottom = int(s[1:]) - 1
-    return go.N - row_from_bottom - 1, col
-
-
-def parse_pygtp_coords(vertex):
-    'Interprets coords. (1, 1) is bottom left; (1, 9) is top left.'
-    if vertex in (gtp.PASS, gtp.RESIGN):
-        return None
-    return go.N - vertex[1], vertex[0] - 1
-
-
-def unparse_pygtp_coords(c):
-    if c is None:
-        return gtp.PASS
-    return c[1] + 1, go.N - c[0]
-
-
-def parse_game_result(result):
-    if re.match(r'[bB]\+', result):
-        return go.BLACK
-    elif re.match(r'[wW]\+', result):
-        return go.WHITE
-    else:
-        return None
-
-
-def product(numbers):
-    return functools.reduce(operator.mul, numbers)
-
-
-def take_n(n, iterable):
-    return list(itertools.islice(iterable, n))
-
-
-def iter_chunks(chunk_size, iterator):
-    while True:
-        next_chunk = take_n(chunk_size, iterator)
-        # If len(iterable) % chunk_size == 0, don't return an empty chunk.
-        if next_chunk:
-            yield next_chunk
-        else:
-            break
-
-
-def shuffler(iterator, pool_size=10 ** 5, refill_threshold=0.9):
-    yields_between_refills = round(pool_size * (1 - refill_threshold))
-    # initialize pool; this step may or may not exhaust the iterator.
-    pool = take_n(pool_size, iterator)
-    while True:
-        random.shuffle(pool)
-        for i in range(yields_between_refills):
-            yield pool.pop()
-        next_batch = take_n(yields_between_refills, iterator)
-        if not next_batch:
-            break
-        pool.extend(next_batch)
-    # finish consuming whatever's left - no need for further randomization.
-    yield pool
-
-
-class timer(object):
-    all_times = defaultdict(float)
-
-    def __init__(self, label):
-        self.label = label
-
-    def __enter__(self):
-        self.tick = time.time()
-
-    def __exit__(self, type, value, traceback):
-        self.tock = time.time()
-        self.all_times[self.label] += self.tock - self.tick
-
-    @classmethod
-    def print_times(cls):
-        for k, v in cls.all_times.items():
-            print("%s: %.3f" % (k, v))
+def another_color(color):
+    return color * -1
diff --git a/GTP/game.py b/GTP/game.py
index 76b0750..2d76a7d 100644
--- a/GTP/game.py
+++ b/GTP/game.py
@@ -4,7 +4,7 @@
 # $Date: Tue Nov 28 14:4726 2017 +0800
 # $Author: renyong15 © <mails.tsinghua.edu.cn>
 #
-
+from __future__ import print_function
 import utils
 import copy
 
@@ -173,15 +173,20 @@ class Game:
     def show_board(self):
         row = [i for i in range(1, 20)]
         col = ' abcdefghijklmnopqrstuvwxyz'
-
-        for i in range(self.size):
-            print(row[i])
-            if row[i] < 10:
-                print(' ')
-            for j in range(self.size):
-                print(self.status2symbol(self.board[self._flatten((j + 1, i + 1))]))
-            print('\n')
-        print(' ')
+        print(' ', end='')
         for j in range(self.size + 1):
-            print(col[j])
+            print(col[j], end='    ')
+
         print('\n')
+        for i in range(self.size):
+            print(row[i], end='    ')
+            if row[i] < 10:
+                print(' ', end='')
+            for j in range(self.size):
+                print(self.status2symbol(self.board[self._flatten((j + 1, i + 1))]), end='    ')
+            print('\n')
+
+
+if __name__=="__main__":
+    g = Game()
+    g.show_board()
\ No newline at end of file
diff --git a/GTP/utils.py b/GTP/utils.py
index b7ce00c..c4114a3 100644
--- a/GTP/utils.py
+++ b/GTP/utils.py
@@ -17,6 +17,3 @@ RESIGN = "resign"
 
 def another_color(color):
     return color * -1
-
-
-