diff --git a/.gitignore b/.gitignore
index 36d134c..d697b92 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ checkpoints
 checkpoints_origin
 *.json
 .DS_Store
+data
diff --git a/AlphaGo/Network.py b/AlphaGo/Network.py
deleted file mode 100644
index caf7710..0000000
--- a/AlphaGo/Network.py
+++ /dev/null
@@ -1,211 +0,0 @@
-import os
-import time
-import sys
-
-import numpy as np
-import time
-import tensorflow as tf
-import tensorflow.contrib.layers as layers
-
-import multi_gpu
-import time
-
-# os.environ["CUDA_VISIBLE_DEVICES"] = "1"
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-
-
-def residual_block(input, is_training):
-    normalizer_params = {'is_training': is_training,
-                         'updates_collections': tf.GraphKeys.UPDATE_OPS}
-    h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
-                      normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
-                      weights_regularizer=layers.l2_regularizer(1e-4))
-    h = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
-                      normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
-                      weights_regularizer=layers.l2_regularizer(1e-4))
-    h = h + input
-    return tf.nn.relu(h)
-
-
-def policy_heads(input, is_training):
-    normalizer_params = {'is_training': is_training,
-                         'updates_collections': tf.GraphKeys.UPDATE_OPS}
-    h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
-                      normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
-                      weights_regularizer=layers.l2_regularizer(1e-4))
-    h = layers.flatten(h)
-    h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4))
-    return h
-
-
-def value_heads(input, is_training):
-    normalizer_params = {'is_training': is_training,
-                         'updates_collections': tf.GraphKeys.UPDATE_OPS}
-    h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
-                      normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
-                      weights_regularizer=layers.l2_regularizer(1e-4))
-    h = layers.flatten(h)
-    h = layers.fully_connected(h, 256, activation_fn=tf.nn.relu, weights_regularizer=layers.l2_regularizer(1e-4))
-    h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))
-    return h
-
-
-class Network(object):
-    def __init__(self):
-        self.x = tf.placeholder(tf.float32, shape=[None, 19, 19, 17])
-        self.is_training = tf.placeholder(tf.bool, shape=[])
-        self.z = tf.placeholder(tf.float32, shape=[None, 1])
-        self.pi = tf.placeholder(tf.float32, shape=[None, 362])
-        self.build_network()
-
-    def build_network(self):
-        h = layers.conv2d(self.x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm,
-                          normalizer_params={'is_training': self.is_training,
-                                             'updates_collections': tf.GraphKeys.UPDATE_OPS},
-                          weights_regularizer=layers.l2_regularizer(1e-4))
-        for i in range(19):
-            h = residual_block(h, self.is_training)
-        self.v = value_heads(h, self.is_training)
-        self.p = policy_heads(h, self.is_training)
-        # loss = tf.reduce_mean(tf.square(z-v)) - tf.multiply(pi, tf.log(tf.clip_by_value(tf.nn.softmax(p), 1e-8, tf.reduce_max(tf.nn.softmax(p)))))
-        self.value_loss = tf.reduce_mean(tf.square(self.z - self.v))
-        self.policy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.pi, logits=self.p))
-
-        self.reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
-        self.total_loss = self.value_loss + self.policy_loss + self.reg
-        # train_op = tf.train.MomentumOptimizer(1e-4, momentum=0.9, use_nesterov=True).minimize(total_loss)
-        self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-        with tf.control_dependencies(self.update_ops):
-            self.train_op = tf.train.RMSPropOptimizer(1e-4).minimize(self.total_loss)
-        self.var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
-        self.saver = tf.train.Saver(max_to_keep=10, var_list=self.var_list)
-
-    def train(self):
-        data_path = "/home/tongzheng/data/"
-        data_name = os.listdir("/home/tongzheng/data/")
-        epochs = 100
-        batch_size = 128
-
-        result_path = "./checkpoints/"
-        with multi_gpu.create_session() as sess:
-            sess.run(tf.global_variables_initializer())
-            ckpt_file = tf.train.latest_checkpoint(result_path)
-            if ckpt_file is not None:
-                print('Restoring model from {}...'.format(ckpt_file))
-                self.saver.restore(sess, ckpt_file)
-            for epoch in range(epochs):
-                for name in data_name:
-                    data = np.load(data_path + name)
-                    boards = data["boards"]
-                    wins = data["wins"]
-                    ps = data["ps"]
-                    print (boards.shape)
-                    print (wins.shape)
-                    print (ps.shape)
-                    batch_num = boards.shape[0] // batch_size
-                    index = np.arange(boards.shape[0])
-                    np.random.shuffle(index)
-                    value_losses = []
-                    policy_losses = []
-                    regs = []
-                    time_train = -time.time()
-                    for iter in range(batch_num):
-                        lv, lp, r, value, prob, _ = sess.run(
-                            [self.value_loss, self.policy_loss, self.reg, self.v, tf.nn.softmax(p), self.train_op],
-                            feed_dict={self.x: boards[
-                                index[iter * batch_size:(iter + 1) * batch_size]],
-                                       self.z: wins[index[
-                                               iter * batch_size:(iter + 1) * batch_size]],
-                                       self.pi: ps[index[
-                                              iter * batch_size:(iter + 1) * batch_size]],
-                                       self.is_training: True})
-                        value_losses.append(lv)
-                        policy_losses.append(lp)
-                        regs.append(r)
-                        if iter % 1 == 0:
-                            print(
-                                "Epoch: {}, Part {}, Iteration: {}, Time: {}, Value Loss: {}, Policy Loss: {}, Reg: {}".format(
-                                    epoch, name, iter, time.time() + time_train, np.mean(np.array(value_losses)),
-                                    np.mean(np.array(policy_losses)), np.mean(np.array(regs))))
-                            time_train = -time.time()
-                            value_losses = []
-                            policy_losses = []
-                            regs = []
-                        if iter % 20 == 0:
-                            save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter)
-                            self.saver.save(sess, result_path + save_path)
-                    del data, boards, wins, ps
-
-
-                # def forward(call_number):
-                #     # checkpoint_path = "/home/yama/rl/tianshou/AlphaGo/checkpoints"
-                #     checkpoint_path = "/home/jialian/stuGo/tianshou/stuGo/checkpoints/"
-                #     board_file = np.genfromtxt("/home/jialian/stuGo/tianshou/leela-zero/src/mcts_nn_files/board_" + call_number,
-                #                                dtype='str');
-                #     human_board = np.zeros((17, 19, 19))
-                #
-                #     # TODO : is it ok to ignore the last channel?
-                #     for i in range(17):
-                #         human_board[i] = np.array(list(board_file[i])).reshape(19, 19)
-                #     # print("============================")
-                #     # print("human board sum : " + str(np.sum(human_board[-1])))
-                #     # print("============================")
-                #     # print(human_board)
-                #     # print("============================")
-                #     # rint(human_board)
-                #     feed_board = human_board.transpose(1, 2, 0).reshape(1, 19, 19, 17)
-                #     # print(feed_board[:,:,:,-1])
-                #     # print(feed_board.shape)
-                #
-                #     # npz_board = np.load("/home/yama/rl/tianshou/AlphaGo/data/7f83928932f64a79bc1efdea268698ae.npz")
-                #     # print(npz_board["boards"].shape)
-                #     # feed_board = npz_board["boards"][10].reshape(-1, 19, 19, 17)
-                #     ##print(feed_board)
-                #     # show_board = feed_board[0].transpose(2, 0, 1)
-                #     # print("board shape : ", show_board.shape)
-                #     # print(show_board)
-                #
-                #     itflag = False
-                #     with multi_gpu.create_session() as sess:
-                #         sess.run(tf.global_variables_initializer())
-                #         ckpt_file = tf.train.latest_checkpoint(checkpoint_path)
-                #         if ckpt_file is not None:
-                #             # print('Restoring model from {}...'.format(ckpt_file))
-                #             saver.restore(sess, ckpt_file)
-                #         else:
-                #             raise ValueError("No model loaded")
-                #         res = sess.run([tf.nn.softmax(p), v], feed_dict={x: feed_board, is_training: itflag})
-                #         # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False})
-                #         # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True})
-                #         # print(np.argmax(res[0]))
-                #         np.savetxt(sys.stdout, res[0][0], fmt="%.6f", newline=" ")
-                #         np.savetxt(sys.stdout, res[1][0], fmt="%.6f", newline=" ")
-                #         pv_file = "/home/jialian/stuGotianshou/leela-zero/src/mcts_nn_files/policy_value"
-                #         np.savetxt(pv_file, np.concatenate((res[0][0], res[1][0])), fmt="%.6f", newline=" ")
-                #     # np.savetxt(pv_file, res[1][0], fmt="%.6f", newline=" ")
-                #     return res
-
-    def forward(self):
-        checkpoint_path = "/home/tongzheng/tianshou/AlphaGo/checkpoints/"
-        sess = multi_gpu.create_session()
-        sess.run(tf.global_variables_initializer())
-        ckpt_file = tf.train.latest_checkpoint(checkpoint_path)
-        if ckpt_file is not None:
-            print('Restoring model from {}...'.format(ckpt_file))
-            self.saver.restore(sess, ckpt_file)
-            print('Successfully loaded')
-        else:
-            raise ValueError("No model loaded")
-        # prior, value = sess.run([tf.nn.softmax(p), v], feed_dict={x: state, is_training: False})
-        # return prior, value
-        return sess
-
-
-if __name__ == '__main__':
-    state = np.random.randint(0, 1, [1, 19, 19, 17])
-    net = Network()
-    sess = net.forward()
-    start = time.time()
-    for i in range(100):
-        sess.run([tf.nn.softmax(net.p), net.v], feed_dict={net.x: state, net.is_training: False})
-        print("Step {}, Cumulative time {}".format(i, time.time() - start))
diff --git a/AlphaGo/Network_ori.py b/AlphaGo/Network_ori.py
deleted file mode 100644
index 9d33bb9..0000000
--- a/AlphaGo/Network_ori.py
+++ /dev/null
@@ -1,175 +0,0 @@
-import os
-import time
-import gc
-
-import numpy as np
-import tensorflow as tf
-import tensorflow.contrib.layers as layers
-
-import multi_gpu
-
-os.environ["CUDA_VISIBLE_DEVICES"] = "1"
-
-
-def residual_block(input, is_training):
-    normalizer_params = {'is_training': is_training,
-                         'updates_collections': tf.GraphKeys.UPDATE_OPS}
-    h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
-                      normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
-                      weights_regularizer=layers.l2_regularizer(1e-4))
-    h = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
-                      normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
-                      weights_regularizer=layers.l2_regularizer(1e-4))
-    h = h + input
-    return tf.nn.relu(h)
-
-
-def policy_heads(input, is_training):
-    normalizer_params = {'is_training': is_training,
-                         'updates_collections': tf.GraphKeys.UPDATE_OPS}
-    h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
-                      normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
-                      weights_regularizer=layers.l2_regularizer(1e-4))
-    h = layers.flatten(h)
-    h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4))
-    return h
-
-
-def value_heads(input, is_training):
-    normalizer_params = {'is_training': is_training,
-                         'updates_collections': tf.GraphKeys.UPDATE_OPS}
-    h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
-                      normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
-                      weights_regularizer=layers.l2_regularizer(1e-4))
-    h = layers.flatten(h)
-    h = layers.fully_connected(h, 256, activation_fn=tf.nn.relu, weights_regularizer=layers.l2_regularizer(1e-4))
-    h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))
-    return h
-
-
-x = tf.placeholder(tf.float32, shape=[None, 19, 19, 17])
-is_training = tf.placeholder(tf.bool, shape=[])
-z = tf.placeholder(tf.float32, shape=[None, 1])
-pi = tf.placeholder(tf.float32, shape=[None, 362])
-
-h = layers.conv2d(x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm,
-                  normalizer_params={'is_training': is_training, 'updates_collections': tf.GraphKeys.UPDATE_OPS},
-                  weights_regularizer=layers.l2_regularizer(1e-4))
-for i in range(19):
-    h = residual_block(h, is_training)
-v = value_heads(h, is_training)
-p = policy_heads(h, is_training)
-# loss = tf.reduce_mean(tf.square(z-v)) - tf.multiply(pi, tf.log(tf.clip_by_value(tf.nn.softmax(p), 1e-8, tf.reduce_max(tf.nn.softmax(p)))))
-value_loss = tf.reduce_mean(tf.square(z - v))
-policy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=pi, logits=p))
-
-reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
-total_loss = value_loss + policy_loss + reg
-# train_op = tf.train.MomentumOptimizer(1e-4, momentum=0.9, use_nesterov=True).minimize(total_loss)
-update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-with tf.control_dependencies(update_ops):
-    train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
-var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
-saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
-
-
-def train():
-    data_path = "/home/tongzheng/data/"
-    data_name = os.listdir("/home/tongzheng/data/")
-    epochs = 100
-    batch_size = 128
-
-    result_path = "./checkpoints/"
-    with multi_gpu.create_session() as sess:
-        sess.run(tf.global_variables_initializer())
-        ckpt_file = tf.train.latest_checkpoint(result_path)
-        if ckpt_file is not None:
-            print('Restoring model from {}...'.format(ckpt_file))
-            saver.restore(sess, ckpt_file)
-        for epoch in range(epochs):
-            for name in data_name:
-                data = np.load(data_path + name)
-                boards = data["boards"]
-                wins = data["wins"]
-                ps = data["ps"]
-                print (boards.shape)
-                print (wins.shape)
-                print (ps.shape)
-                # batch_num = 1
-                batch_num = boards.shape[0] // batch_size
-                index = np.arange(boards.shape[0])
-                np.random.shuffle(index)
-                value_losses = []
-                policy_losses = []
-                regs = []
-                time_train = -time.time()
-                for iter in range(batch_num):
-                    lv, lp, r, _ = sess.run([value_loss, policy_loss, reg, train_op],
-                                            feed_dict={x: boards[
-                                                index[iter * batch_size:(iter + 1) * batch_size]],
-                                                       z: wins[index[
-                                                               iter * batch_size:(iter + 1) * batch_size]],
-                                                       pi: ps[index[
-                                                              iter * batch_size:(iter + 1) * batch_size]],
-                                                       is_training: True})
-                    value_losses.append(lv)
-                    policy_losses.append(lp)
-                    regs.append(r)
-                    del lv, lp, r
-                    if iter % 1 == 0:
-                        print(
-                            "Epoch: {}, Part {}, Iteration: {}, Time: {}, Value Loss: {}, Policy Loss: {}, Reg: {}".format(
-                                epoch, name, iter, time.time() + time_train, np.mean(np.array(value_losses)),
-                                np.mean(np.array(policy_losses)), np.mean(np.array(regs))))
-                        del value_losses, policy_losses, regs, time_train
-                        time_train = -time.time()
-                        value_losses = []
-                        policy_losses = []
-                        regs = []
-                    if iter % 20 == 0:
-                        save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter)
-                        saver.save(sess, result_path + save_path)
-                        del save_path
-                del data, boards, wins, ps, batch_num, index
-                gc.collect()
-
-
-def forward(board):
-    result_path = "./checkpoints"
-    itflag = False
-    res = None
-    if board is None:
-        # data = np.load("/home/tongzheng/meta-data/80b7bf21bce14862806d48c3cd760a1b.npz")
-        data = np.load("./data/7f83928932f64a79bc1efdea268698ae.npz")
-        board = data["boards"][50].reshape(-1, 19, 19, 17)
-        human_board = board[0].transpose(2, 0, 1)
-        print("============================")
-        print("human board sum : " + str(np.sum(human_board)))
-        print("============================")
-        print(board[:, :, :, -1])
-        itflag = False
-    with multi_gpu.create_session() as sess:
-        sess.run(tf.global_variables_initializer())
-        ckpt_file = tf.train.latest_checkpoint(result_path)
-        if ckpt_file is not None:
-            print('Restoring model from {}...'.format(ckpt_file))
-            saver.restore(sess, ckpt_file)
-        else:
-            raise ValueError("No model loaded")
-        res = sess.run([tf.nn.softmax(p), v], feed_dict={x: board, is_training: itflag})
-        # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False})
-        # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True})
-        # print(np.argmax(res[0]))
-        print(res)
-        print(data["p"][0])
-        print(np.argmax(res[0]))
-        print(np.argmax(data["p"][0]))
-    # print(res[0].tolist()[0])
-    # print(np.argmax(res[0]))
-    return res
-
-
-if __name__ == '__main__':
-    # train()
-    # if sys.argv[1] == "test":
-    forward(None)
diff --git a/AlphaGo/engine.py b/AlphaGo/engine.py
index 1f9af85..8b54470 100644
--- a/AlphaGo/engine.py
+++ b/AlphaGo/engine.py
@@ -167,7 +167,7 @@ class GTPEngine():
         move = self._parse_move(args)
         if move:
             color, vertex = move
-            res = self._game.do_move(color, vertex)
+            res = self._game.play_move(color, vertex)
             if res:
                 return None, True
             else:
@@ -177,17 +177,21 @@ class GTPEngine():
     def cmd_genmove(self, args, **kwargs):
         color = self._parse_color(args)
         if color:
-            move = self._game.gen_move(color)
+            move = self._game.think_play_move(color)
             return self._vertex_point2string(move), True
         else:
             return 'unknown player', False
 
     def cmd_get_score(self, args, **kwargs):
-        return self._game.executor.get_score(), None
+        return self._game.game_engine.executor_get_score(self._game.board, True), True
 
     def cmd_show_board(self, args, **kwargs):
         return self._game.board, True
 
+    def cmd_get_prob(self, args, **kwargs):
+        return self._game.prob, True
+
+
 if __name__ == "main":
     game = Game()
     engine = GTPEngine(game_obj=Game)
diff --git a/AlphaGo/game.py b/AlphaGo/game.py
index 2a82d8e..8706572 100644
--- a/AlphaGo/game.py
+++ b/AlphaGo/game.py
@@ -9,16 +9,13 @@ import utils
 import copy
 import tensorflow as tf
 import numpy as np
-import sys
+import sys, os
 import go
-import network_small
-import strategy
+import model
 from collections import deque
+sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir))
 from tianshou.core.mcts.mcts import MCTS
 
-import Network
-#from strategy import strategy
-
 class Game:
     '''
     Load the real game and trained weights.
@@ -26,7 +23,7 @@ class Game:
     TODO : Maybe merge with the engine class in future, 
     currently leave it untouched for interacting with Go UI.
     '''
-    def __init__(self, size=9, komi=6.5, checkpoint_path=None):
+    def __init__(self, size=9, komi=3.75, checkpoint_path=None):
         self.size = size
         self.komi = komi
         self.board = [utils.EMPTY] * (self.size ** 2)
@@ -34,24 +31,10 @@ class Game:
         self.latest_boards = deque(maxlen=8)
         for _ in range(8):
             self.latest_boards.append(self.board)
-
-        self.executor = go.Go(game=self)
-        #self.strategy = strategy(checkpoint_path)
-
-        self.simulator = strategy.GoEnv(game=self)
-        self.net = network_small.Network()
-        self.sess = self.net.forward(checkpoint_path)
-        self.evaluator = lambda state: self.sess.run([tf.nn.softmax(self.net.p), self.net.v],
-                                                     feed_dict={self.net.x: state, self.net.is_training: False})
-
-    def _flatten(self, vertex):
-        x, y = vertex
-        return (x - 1) * self.size + (y - 1)
-
-    def _deflatten(self, idx):
-        x = idx // self.size + 1
-        y = idx % self.size + 1
-        return (x, y)
+        self.evaluator = model.ResNet(self.size, self.size**2 + 1, history_length=8)
+        # self.evaluator = lambda state: self.sess.run([tf.nn.softmax(self.net.p), self.net.v],
+        #                                              feed_dict={self.net.x: state, self.net.is_training: False})
+        self.game_engine = go.Go(size=self.size, komi=self.komi)
 
     def clear(self):
         self.board = [utils.EMPTY] * (self.size ** 2)
@@ -66,42 +49,30 @@ class Game:
     def set_komi(self, k):
         self.komi = k
 
-    def generate_nn_input(self, latest_boards, color):
-        state = np.zeros([1, self.size, self.size, 17])
-        for i in range(8):
-            state[0, :, :, i] = np.array(np.array(latest_boards[i]) == np.ones(self.size ** 2)).reshape(self.size, self.size)
-            state[0, :, :, i + 8] = np.array(np.array(latest_boards[i]) == -np.ones(self.size ** 2)).reshape(self.size, self.size)
-        if color == utils.BLACK:
-            state[0, :, :, 16] = np.ones([self.size, self.size])
-        if color == utils.WHITE:
-            state[0, :, :, 16] = np.zeros([self.size, self.size])
-        return state
-
-    def strategy_gen_move(self, latest_boards, color):
-        self.simulator.simulate_latest_boards = copy.copy(latest_boards)
-        self.simulator.simulate_board = copy.copy(latest_boards[-1])
-        nn_input = self.generate_nn_input(self.simulator.simulate_latest_boards, color)
-        mcts = MCTS(self.simulator, self.evaluator, nn_input, self.size ** 2 + 1, inverse=True, max_step=1)
+    def think(self, latest_boards, color):
+        mcts = MCTS(self.game_engine, self.evaluator, [latest_boards, color], self.size ** 2 + 1, inverse=True)
+        mcts.search(max_step=20)
         temp = 1
         prob = mcts.root.N ** temp / np.sum(mcts.root.N ** temp)
         choice = np.random.choice(self.size ** 2 + 1, 1, p=prob).tolist()[0]
         if choice == self.size ** 2:
             move = utils.PASS
         else:
-            move = self._deflatten(choice)
+            move = self.game_engine._deflatten(choice)
         return move, prob
 
-    def do_move(self, color, vertex):
+    def play_move(self, color, vertex):
+        # this function can be called directly to play the opponent's move
         if vertex == utils.PASS:
             return True
-        res = self.executor.do_move(color, vertex)
+        res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex)
         return res
 
-    def gen_move(self, color):
-        # move = self.strategy.gen_move(color)
-        # return move
-        move, self.prob = self.strategy_gen_move(self.latest_boards, color)
-        self.do_move(color, move)
+    def think_play_move(self, color):
+        # although we don't need to return self.prob, however it is needed for neural network training
+        move, self.prob = self.think(self.latest_boards, color)
+        # play the move immediately
+        self.play_move(color, move)
         return move
 
     def status2symbol(self, s):
@@ -127,6 +98,7 @@ class Game:
 if __name__ == "__main__":
     g = Game()
     g.show_board()
+    g.think_play_move(1)
     #file = open("debug.txt", "a")
     #file.write("mcts check\n")
     #file.close()
diff --git a/AlphaGo/go.py b/AlphaGo/go.py
index 7b1d3e7..9b7e21f 100644
--- a/AlphaGo/go.py
+++ b/AlphaGo/go.py
@@ -1,7 +1,7 @@
 from __future__ import print_function
 import utils
 import copy
-import sys
+import numpy as np
 from collections import deque
 
 '''
@@ -12,83 +12,26 @@ Settings of the Go game.
 '''
 
 NEIGHBOR_OFFSET = [[1, 0], [-1, 0], [0, -1], [0, 1]]
+CORNER_OFFSET = [[-1, -1], [-1, 1], [1, 1], [1, -1]]
 
 class Go:
     def __init__(self, **kwargs):
-        self.game = kwargs['game']
+        self.size = kwargs['size']
+        self.komi = kwargs['komi']
 
-    def _bfs(self, vertex, color, block, status):
-        block.append(vertex)
-        status[self.game._flatten(vertex)] = True
-        nei = self._neighbor(vertex)
-        for n in nei:
-            if not status[self.game._flatten(n)]:
-                if self.game.board[self.game._flatten(n)] == color:
-                    self._bfs(n, color, block, status)
+    def _flatten(self, vertex):
+        x, y = vertex
+        return (x - 1) * self.size + (y - 1)
 
-    def _find_block(self, vertex):
-        block = []
-        status = [False] * (self.game.size ** 2)
-        color = self.game.board[self.game._flatten(vertex)]
-        self._bfs(vertex, color, block, status)
-
-        for b in block:
-            for n in self._neighbor(b):
-                if self.game.board[self.game._flatten(n)] == utils.EMPTY:
-                    return False, block
-        return True, block
-
-    def _find_boarder(self, vertex):
-        block = []
-        status = [False] * (self.game.size ** 2)
-        self._bfs(vertex, utils.EMPTY, block, status)
-        border = []
-        for b in block:
-            for n in self._neighbor(b):
-                if not (n in block):
-                    border.append(n)
-        return border
-
-    def _is_qi(self, color, vertex):
-        nei = self._neighbor(vertex)
-        for n in nei:
-            if self.game.board[self.game._flatten(n)] == utils.EMPTY:
-                return True
-
-        self.game.board[self.game._flatten(vertex)] = color
-        for n in nei:
-            if self.game.board[self.game._flatten(n)] == utils.another_color(color):
-                can_kill, block = self._find_block(n)
-                if can_kill:
-                    self.game.board[self.game._flatten(vertex)] = utils.EMPTY
-                    return True
-
-        ### can not suicide
-        can_kill, block = self._find_block(vertex)
-        if can_kill:
-            self.game.board[self.game._flatten(vertex)] = utils.EMPTY
-            return False
-
-        self.game.board[self.game._flatten(vertex)] = utils.EMPTY
-        return True
-
-    def _check_global_isomorphous(self, color, vertex):
-        ##backup
-        _board = copy.copy(self.game.board)
-        self.game.board[self.game._flatten(vertex)] = color
-        self._process_board(color, vertex)
-        if self.game.board in self.game.history:
-            res = True
-        else:
-            res = False
-
-        self.game.board = _board
-        return res
+    def _deflatten(self, idx):
+        x = idx // self.size + 1
+        y = idx % self.size + 1
+        return (x, y)
 
     def _in_board(self, vertex):
         x, y = vertex
-        if x < 1 or x > self.game.size: return False
-        if y < 1 or y > self.game.size: return False
+        if x < 1 or x > self.size: return False
+        if y < 1 or y > self.size: return False
         return True
 
     def _neighbor(self, vertex):
@@ -101,45 +44,201 @@ class Go:
                 nei.append((_x, _y))
         return nei
 
-    def _process_board(self, color, vertex):
+    def _corner(self, vertex):
+        x, y = vertex
+        corner = []
+        for d in CORNER_OFFSET:
+            _x = x + d[0]
+            _y = y + d[1]
+            if self._in_board((_x, _y)):
+                corner.append((_x, _y))
+        return corner
+
+    def _find_group(self, current_board, vertex):
+        color = current_board[self._flatten(vertex)]
+        # print ("color : ", color)
+        chain = set()
+        frontier = [vertex]
+        has_liberty = False
+        while frontier:
+            current = frontier.pop()
+            # print ("current : ", current)
+            chain.add(current)
+            for n in self._neighbor(current):
+                if current_board[self._flatten(n)] == color and not n in chain:
+                    frontier.append(n)
+                if current_board[self._flatten(n)] == utils.EMPTY:
+                    has_liberty = True
+        return has_liberty, chain
+
+    def _is_suicide(self, current_board, color, vertex):
+        current_board[self._flatten(vertex)] = color # assume that we already take this move
+        suicide = False
+
+        has_liberty, group = self._find_group(current_board, vertex)
+        if not has_liberty:
+            suicide = True # no liberty, suicide
+            for n in self._neighbor(vertex):
+                if current_board[self._flatten(n)] == utils.another_color(color):
+                    opponent_liberty, group = self._find_group(current_board, n)
+                    if not opponent_liberty:
+                        suicide = False # this move is able to take opponent's stone, not suicide
+
+        current_board[self._flatten(vertex)] = utils.EMPTY # undo this move
+        return suicide
+
+    def _process_board(self, current_board, color, vertex):
         nei = self._neighbor(vertex)
         for n in nei:
-            if self.game.board[self.game._flatten(n)] == utils.another_color(color):
-                can_kill, block = self._find_block(n)
-                if can_kill:
-                    for b in block:
-                        self.game.board[self.game._flatten(b)] = utils.EMPTY
+            if current_board[self._flatten(n)] == utils.another_color(color):
+                has_liberty, group = self._find_group(current_board, n)
+                if not has_liberty:
+                    for b in group:
+                        current_board[self._flatten(b)] = utils.EMPTY
 
-    def is_valid(self, color, vertex):
+    def _check_global_isomorphous(self, history_boards, current_board, color, vertex):
+        repeat = False
+        next_board = copy.copy(current_board)
+        next_board[self._flatten(vertex)] = color
+        self._process_board(next_board, color, vertex)
+        if next_board in history_boards:
+            repeat = True
+        return repeat
+
+    def _is_eye(self, current_board, color, vertex):
+        nei = self._neighbor(vertex)
+        cor = self._corner(vertex)
+        ncolor = {color == current_board[self._flatten(n)] for n in nei}
+        if False in ncolor:
+            # print "not all neighbors are in same color with us"
+            return False
+        _, group = self._find_group(current_board, nei[0])
+        if set(nei) < group:
+            # print "all neighbors are in same group and same color with us"
+            return True
+        else:
+            opponent_number = [current_board[self._flatten(c)] for c in cor].count(-color)
+            opponent_propotion = float(opponent_number) / float(len(cor))
+            if opponent_propotion < 0.5:
+                # print "few opponents, real eye"
+                return True
+            else:
+                # print "many opponents, fake eye"
+                return False
+
+    def _knowledge_prunning(self, current_board, color, vertex):
+        #  forbid some stupid selfplay using human knowledge
+        if self._is_eye(current_board, color, vertex):
+            return False
+            # forbid position on its own eye.
+        return True
+
+    def _is_game_finished(self, current_board, color):
+        '''
+        for each empty position, if it has both BLACK and WHITE neighbors, the game is still not finished
+        :return: return the game is finished
+        '''
+        board = copy.deepcopy(current_board)
+        empty_idx = [i for i, x in enumerate(board) if x == utils.EMPTY]  # find all empty idx
+        for idx in empty_idx:
+            neighbor_idx = self._neighbor(self.deflatten(idx))
+        if len(neighbor_idx) > 1:
+            first_idx = neighbor_idx[0]
+        for other_idx in neighbor_idx[1:]:
+            if board[self.flatten(other_idx)] != board[self.flatten(first_idx)]:
+                return False
+
+        return True
+
+    def _action2vertex(self, action):
+        if action == self.size ** 2:
+            vertex = (0, 0)
+        else:
+            vertex = self._deflatten(action)
+        return vertex
+
+    def _is_valid(self, history_boards, current_board, color, vertex):
         ### in board
         if not self._in_board(vertex):
             return False
 
         ### already have stone
-        if not self.game.board[self.game._flatten(vertex)] == utils.EMPTY:
+        if not current_board[self._flatten(vertex)] == utils.EMPTY:
             return False
 
-        ### check if it is qi
-        if not self._is_qi(color, vertex):
+        ### check if it is suicide
+        if self._is_suicide(current_board, color, vertex):
             return False
 
-        if self._check_global_isomorphous(color, vertex):
+        ### forbid global isomorphous
+        if self._check_global_isomorphous(history_boards, current_board, color, vertex):
             return False
 
         return True
 
-    def do_move(self, color, vertex):
-        if not self.is_valid(color, vertex):
+    def simulate_is_valid(self, state, action):
+        history_boards, color = state
+        vertex = self._action2vertex(action)
+        current_board = history_boards[-1]
+
+        if not self._is_valid(history_boards, current_board, color, vertex):
+            return False
+
+        if not self._knowledge_prunning(current_board, color, vertex):
             return False
-        self.game.board[self.game._flatten(vertex)] = color
-        self._process_board(color, vertex)
-        self.game.history.append(copy.copy(self.game.board))
-        self.game.latest_boards.append(copy.copy(self.game.board))
         return True
 
-    def _find_empty(self):
-        idx = [i for i,x in enumerate(self.game.board) if x == utils.EMPTY ][0]
-        return self.game._deflatten(idx)
+    def simulate_is_valid_list(self, state, action_set):
+        # find all the invalid actions
+        invalid_action_list = []
+        for action_candidate in action_set[:-1]:
+            # go through all the actions excluding pass
+            if not self.simulate_is_valid(state, action_candidate):
+                invalid_action_list.append(action_candidate)
+        if len(invalid_action_list) < len(action_set) - 1:
+            invalid_action_list.append(action_set[-1])
+            # forbid pass, if we have other choices
+            # TODO: In fact we should not do this. In some extreme cases, we should permit pass.
+        return invalid_action_list
+
+    def _do_move(self, board, color, vertex):
+        if vertex == utils.PASS:
+            return board
+        else:
+            id_ = self._flatten(vertex)
+            board[id_] = color
+            return board
+
+    def simulate_step_forward(self, state, action):
+        # initialize the simulate_board from state
+        history_boards, color = state
+        vertex = self._action2vertex(action)
+        new_board = self._do_move(copy.copy(history_boards[-1]), color, vertex)
+        history_boards.append(new_board)
+        new_color = -color
+        return [history_boards, new_color], 0
+
+    def executor_do_move(self, history, latest_boards, current_board, color, vertex):
+        if not self._is_valid(history, current_board, color, vertex):
+            return False
+        current_board[self._flatten(vertex)] = color
+        self._process_board(current_board, color, vertex)
+        history.append(copy.copy(current_board))
+        latest_boards.append(copy.copy(current_board))
+        return True
+
+    def _find_empty(self, current_board):
+        idx = [i for i,x in enumerate(current_board) if x == utils.EMPTY ][0]
+        return self._deflatten(idx)
+
+    def _find_boarder(self, current_board, vertex):
+        _, group = self._find_group(current_board, vertex)
+        border = []
+        for b in group:
+            for n in self._neighbor(b):
+                if not (n in group):
+                    border.append(n)
+        return border
 
     def _add_nearby_stones(self, neighbor_vertex_set, start_vertex_x, start_vertex_y, x_diff, y_diff, num_step):
         '''
@@ -159,7 +258,7 @@ class Go:
             start_vertex_x += x_diff
             start_vertex_y += y_diff
 
-    def _predict_from_nearby(self, vertex, neighbor_step = 3):
+    def _predict_from_nearby(self, current_board, vertex, neighbor_step=3):
         '''
         step: the nearby 3 steps is considered
         :vertex: position to be estimated
@@ -175,38 +274,37 @@ class Go:
             self._add_nearby_stones(neighbor_vertex_set, vertex[0], vertex[1] -  step, -1, 1, neighbor_step)
             color_estimate = 0
             for neighbor_vertex in neighbor_vertex_set:
-                color_estimate += self.game.board[self.game._flatten(neighbor_vertex)]
+                color_estimate += current_board[self._flatten(neighbor_vertex)]
             if color_estimate > 0:
                 return utils.BLACK
             elif color_estimate < 0:
                 return utils.WHITE
 
-    def get_score(self, is_unknown_estimation = False):
+    def executor_get_score(self, current_board, is_unknown_estimation=False):
         '''
             is_unknown_estimation: whether use nearby stone to predict the unknown
             return score from BLACK perspective.
         '''
-        _board = copy.copy(self.game.board)
-        while utils.EMPTY in self.game.board:
-            vertex = self._find_empty()
-            boarder = self._find_boarder(vertex)
-            boarder_color = set(map(lambda v: self.game.board[self.game._flatten(v)], boarder))
+        _board = copy.deepcopy(current_board)
+        while utils.EMPTY in _board:
+            vertex = self._find_empty(_board)
+            boarder = self._find_boarder(_board, vertex)
+            boarder_color = set(map(lambda v: _board[self._flatten(v)], boarder))
             if boarder_color == {utils.BLACK}:
-                self.game.board[self.game._flatten(vertex)] = utils.BLACK
+                _board[self._flatten(vertex)] = utils.BLACK
             elif boarder_color == {utils.WHITE}:
-                self.game.board[self.game._flatten(vertex)] = utils.WHITE
+                _board[self._flatten(vertex)] = utils.WHITE
             elif is_unknown_estimation:
-                self.game.board[self.game._flatten(vertex)] = self._predict_from_nearby(vertex)
+                _board[self._flatten(vertex)] = self._predict_from_nearby(_board, vertex)
             else:
-                self.game.board[self.game._flatten(vertex)] =utils.UNKNOWN
+                _board[self._flatten(vertex)] =utils.UNKNOWN
         score = 0
-        for i in self.game.board:
+        for i in _board:
             if i == utils.BLACK:
                 score += 1
             elif i == utils.WHITE:
                 score -= 1
-        score -= self.game.komi
+        score -= self.komi
 
-        self.game.board = _board
         return score
 
diff --git a/AlphaGo/model.py b/AlphaGo/model.py
new file mode 100644
index 0000000..41f3a47
--- /dev/null
+++ b/AlphaGo/model.py
@@ -0,0 +1,270 @@
+import os
+import time
+import sys
+import cPickle
+from collections import deque
+
+import numpy as np
+import tensorflow as tf
+import tensorflow.contrib.layers as layers
+
+import multi_gpu
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+
+
+def residual_block(input, is_training):
+    """
+    one residual block
+
+    :param input: a tensor, input of the residual block
+    :param is_training: a placeholder, indicate whether the model is training or not
+    :return: a tensor, output of the residual block
+    """
+    normalizer_params = {'is_training': is_training,
+                         'updates_collections': tf.GraphKeys.UPDATE_OPS}
+    h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
+                      normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
+                      weights_regularizer=layers.l2_regularizer(1e-4))
+    h = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
+                      normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
+                      weights_regularizer=layers.l2_regularizer(1e-4))
+    h = h + input
+    return tf.nn.relu(h)
+
+
+def policy_head(input, is_training, action_num):
+    """
+    the head of policy branch
+
+    :param input: a tensor, input of the policy head
+    :param is_training: a placeholder, indicate whether the model is training or not
+    :param action_num: action_num: an integer, number of unique actions at any state
+    :return: a tensor: output of the policy head, shape [batch_size, action_num]
+    """
+    normalizer_params = {'is_training': is_training,
+                         'updates_collections': tf.GraphKeys.UPDATE_OPS}
+    h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
+                      normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
+                      weights_regularizer=layers.l2_regularizer(1e-4))
+    h = layers.flatten(h)
+    h = layers.fully_connected(h, action_num, activation_fn=tf.identity,
+                               weights_regularizer=layers.l2_regularizer(1e-4))
+    return h
+
+
+def value_head(input, is_training):
+    """
+    the head of value branch
+
+    :param input: a tensor, input of the value head
+    :param is_training: a placeholder, indicate whether the model is training or not
+    :return: a tensor, output of the value head, shape [batch_size, 1]
+    """
+    normalizer_params = {'is_training': is_training,
+                         'updates_collections': tf.GraphKeys.UPDATE_OPS}
+    h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
+                      normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
+                      weights_regularizer=layers.l2_regularizer(1e-4))
+    h = layers.flatten(h)
+    h = layers.fully_connected(h, 256, activation_fn=tf.nn.relu, weights_regularizer=layers.l2_regularizer(1e-4))
+    h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))
+    return h
+
+
+class Data(object):
+    def __init__(self):
+        self.boards = []
+        self.probs = []
+        self.winner = 0
+
+
+class ResNet(object):
+    def __init__(self, board_size, action_num, history_length=1, residual_block_num=20, checkpoint_path=None):
+        """
+        the resnet model
+
+        :param board_size: an integer, the board size
+        :param action_num: an integer, number of unique actions at any state
+        :param history_length: an integer, the history length to use, default is 1
+        :param residual_block_num: an integer, the number of residual block, default is 20, at least 1
+        :param checkpoint_path: a string, the path to the checkpoint, default is None,
+        """
+        self.board_size = board_size
+        self.action_num = action_num
+        self.history_length = history_length
+        self.checkpoint_path = checkpoint_path
+        self.x = tf.placeholder(tf.float32, shape=[None, self.board_size, self.board_size, 2 * self.history_length + 1])
+        self.is_training = tf.placeholder(tf.bool, shape=[])
+        self.z = tf.placeholder(tf.float32, shape=[None, 1])
+        self.pi = tf.placeholder(tf.float32, shape=[None, self.action_num])
+        self._build_network(residual_block_num, self.checkpoint_path)
+
+        # training hyper-parameters:
+        self.window_length = 1000
+        self.save_freq = 1000
+        self.training_data = {'states': deque(maxlen=self.window_length), 'probs': deque(maxlen=self.window_length),
+                              'winner': deque(maxlen=self.window_length)}
+
+    def _build_network(self, residual_block_num, checkpoint_path):
+        """
+        build the network
+
+        :param residual_block_num: an integer, the number of residual block
+        :param checkpoint_path: a string, the path to the checkpoint, if None, use random initialization parameter
+        :return: None
+        """
+
+        h = layers.conv2d(self.x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
+                          normalizer_fn=layers.batch_norm,
+                          normalizer_params={'is_training': self.is_training,
+                                             'updates_collections': tf.GraphKeys.UPDATE_OPS},
+                          weights_regularizer=layers.l2_regularizer(1e-4))
+        for i in range(residual_block_num - 1):
+            h = residual_block(h, self.is_training)
+        self.v = value_head(h, self.is_training)
+        self.p = policy_head(h, self.is_training, self.action_num)
+        self.value_loss = tf.reduce_mean(tf.square(self.z - self.v))
+        self.policy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.pi, logits=self.p))
+
+        self.reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+        self.total_loss = self.value_loss + self.policy_loss + self.reg
+        self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+        with tf.control_dependencies(self.update_ops):
+            self.train_op = tf.train.AdamOptimizer(1e-4).minimize(self.total_loss)
+        self.var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
+        self.saver = tf.train.Saver(var_list=self.var_list)
+        self.sess = multi_gpu.create_session()
+        self.sess.run(tf.global_variables_initializer())
+        if checkpoint_path is not None:
+            ckpt_file = tf.train.latest_checkpoint(checkpoint_path)
+            if ckpt_file is not None:
+                print('Restoring model from {}...'.format(ckpt_file))
+                self.saver.restore(self.sess, ckpt_file)
+                print('Successfully loaded')
+            else:
+                raise ValueError("No model in path {}".format(checkpoint_path))
+
+    def __call__(self, state):
+        """
+
+        :param history: a list, the history
+        :param color: a string, indicate which one to play
+        :return: a list of tensor, the predicted value and policy given the history and color
+        """
+        history, color = state
+        if len(history) != self.history_length:
+            raise ValueError(
+                'The length of history cannot meet the need of the model, given {}, need {}'.format(len(history),
+                                                                                                    self.history_length))
+        state = self._history2state(history, color)
+        return self.sess.run([self.p, self.v], feed_dict={self.x: state, self.is_training: False})
+
+    def _history2state(self, history, color):
+        """
+        convert the history to the state we need
+
+        :param history: a list, the history
+        :param color: a string, indicate which one to play
+        :return: a ndarray, the state
+        """
+        state = np.zeros([1, self.board_size, self.board_size, 2 * self.history_length + 1])
+        for i in range(self.history_length):
+            state[0, :, :, i] = np.array(np.array(history[i]) == np.ones(self.board_size ** 2)).reshape(self.board_size,
+                                                                                                        self.board_size)
+            state[0, :, :, i + self.history_length] = np.array(
+                np.array(history[i]) == -np.ones(self.board_size ** 2)).reshape(self.board_size, self.board_size)
+        # TODO: need a config to specify the BLACK and WHITE
+        if color == +1:
+            state[0, :, :, 2 * self.history_length] = np.ones([self.board_size, self.board_size])
+        if color == -1:
+            state[0, :, :, 2 * self.history_length] = np.zeros([self.board_size, self.board_size])
+        return state
+
+    # TODO: design the interface between the environment and training
+    def train(self, mode='memory', *args, **kwargs):
+        if mode == 'memory':
+            pass
+        if mode == 'file':
+            self._train_with_file(data_path=kwargs['data_path'], batch_size=kwargs['batch_size'],
+                                  checkpoint_path=kwargs['checkpoint_path'])
+
+    def _train_with_file(self, data_path, batch_size, checkpoint_path):
+        # check if the path is valid
+        if not os.path.exists(data_path):
+            raise ValueError("{} doesn't exist".format(data_path))
+        self.checkpoint_path = checkpoint_path
+        if not os.path.exists(self.checkpoint_path):
+            os.mkdir(self.checkpoint_path)
+
+        new_file_list = []
+        all_file_list = []
+        training_data = {}
+        iters = 0
+        while True:
+            new_file_list = list(set(os.listdir(data_path)).difference(all_file_list))
+            all_file_list = os.listdir(data_path)
+            new_file_list.sort(
+                key=lambda file: os.path.getmtime(data_path + file) if not os.path.isdir(data_path + file) else 0)
+            if new_file_list:
+                for file in new_file_list:
+                    states, probs, winner = self._file_to_training_data(data_path + file)
+                    assert states.shape[0] == probs.shape[0]
+                    assert states.shape[0] == winner.shape[0]
+                    self.training_data['states'].append(states)
+                    self.training_data['probs'].append(probs)
+                    self.training_data['winner'].append(winner)
+                    training_data['states'] = np.concatenate(self.training_data['states'], axis=0)
+                    training_data['probs'] = np.concatenate(self.training_data['probs'], axis=0)
+                    training_data['winner'] = np.concatenate(self.training_data['winner'], axis=0)
+
+            if len(self.training_data['states']) != self.window_length:
+                continue
+            else:
+                data_num = training_data['states'].shape[0]
+                index = np.arange(data_num)
+                np.random.shuffle(index)
+                start_time = time.time()
+                value_loss, policy_loss, reg, _ = self.sess.run(
+                    [self.value_loss, self.policy_loss, self.reg, self.train_op],
+                    feed_dict={self.x: training_data['states'][index[:batch_size]],
+                               self.z: training_data['winner'][index[:batch_size]],
+                               self.pi: training_data['probs'][index[:batch_size]],
+                               self.is_training: True})
+                print("Iteration: {}, Time: {}, Value Loss: {}, Policy Loss: {}, Reg: {}".format(iters,
+                                                                                                 time.time() - start_time,
+                                                                                                 value_loss,
+                                                                                                 policy_loss, reg))
+                iters += 1
+                if iters % self.save_freq == 0:
+                    save_path = "Iteration{}.ckpt".format(iters)
+                    self.saver.save(self.sess, self.checkpoint_path + save_path)
+
+    def _file_to_training_data(self, file_name):
+        with open(file_name, 'r') as file:
+            data = cPickle.load(file)
+        history = deque(maxlen=self.history_length)
+        states = []
+        probs = []
+        winner = []
+        for _ in range(self.history_length):
+            # Note that 0 is specified, need a more general way like config
+            history.append([0] * self.board_size ** 2)
+        # Still, +1 is specified
+        color = +1
+
+        for [board, prob] in zip(data.boards, data.probs):
+            history.append(board)
+            states.append(self._history2state(history, color))
+            probs.append(np.array(prob).reshape(1, self.board_size ** 2 + 1))
+            winner.append(np.array(data.winner).reshape(1, 1))
+            color *= -1
+        states = np.concatenate(states, axis=0)
+        probs = np.concatenate(probs, axis=0)
+        winner = np.concatenate(winner, axis=0)
+        return states, probs, winner
+
+
+if __name__=="__main__":
+    model = ResNet(board_size=9, action_num=82)
+    model.train("file", data_path="./data/", batch_size=128, checkpoint_path="./checkpoint/")
\ No newline at end of file
diff --git a/AlphaGo/network_small.py b/AlphaGo/network.py
similarity index 100%
rename from AlphaGo/network_small.py
rename to AlphaGo/network.py
diff --git a/AlphaGo/play.py b/AlphaGo/play.py
index 7367804..a8267a7 100644
--- a/AlphaGo/play.py
+++ b/AlphaGo/play.py
@@ -5,6 +5,18 @@ import re
 import Pyro4
 import time
 import os
+import cPickle
+
+
+class Data(object):
+    def __init__(self):
+        self.boards = []
+        self.probs = []
+        self.winner = 0
+
+    def reset(self):
+        self.__init__()
+
 
 if __name__ == '__main__':
     """
@@ -13,10 +25,14 @@ if __name__ == '__main__':
     """
     # TODO : we should set the network path in a more configurable way.
     parser = argparse.ArgumentParser()
+    parser.add_argument("--result_path", type=str, default="./data/")
     parser.add_argument("--black_weight_path", type=str, default=None)
     parser.add_argument("--white_weight_path", type=str, default=None)
+    parser.add_argument("--id", type=int, default=0)
     args = parser.parse_args()
 
+    if not os.path.exists(args.result_path):
+        os.mkdir(args.result_path)
     # black_weight_path = "./checkpoints"
     # white_weight_path = "./checkpoints_origin"
     if args.black_weight_path is not None and (not os.path.exists(args.black_weight_path)):
@@ -25,24 +41,29 @@ if __name__ == '__main__':
         raise ValueError("Can't not find the network weights for white player.")
 
     # kill the old server
-    kill_old_server = subprocess.Popen(['killall', 'pyro4-ns'])
-    print "kill the old pyro4 name server, the return code is : " + str(kill_old_server.wait())
-    time.sleep(1)
+    # kill_old_server = subprocess.Popen(['killall', 'pyro4-ns'])
+    # print "kill the old pyro4 name server, the return code is : " + str(kill_old_server.wait())
+    # time.sleep(1)
 
     # start a name server to find the remote object
-    start_new_server = subprocess.Popen(['pyro4-ns', '&'])
-    print "Start Name Sever : " + str(start_new_server.pid)  # + str(start_new_server.wait())
-    time.sleep(1)
+    # start_new_server = subprocess.Popen(['pyro4-ns', '&'])
+    # print "Start Name Sever : " + str(start_new_server.pid)  # + str(start_new_server.wait())
+    # time.sleep(1)
 
     # start two different player with different network weights.
-    agent_v0 = subprocess.Popen(['python', '-u', 'player.py', '--role=black', '--checkpoint_path=' + str(args.black_weight_path)],
-                                    stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    black_role_name = 'black' + str(args.id)
+    white_role_name = 'white' + str(args.id)
 
-    agent_v1 = subprocess.Popen(['python', '-u', 'player.py', '--role=white', '--checkpoint_path=' + str(args.white_weight_path)],
-                                    stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    agent_v0 = subprocess.Popen(
+        ['python', '-u', 'player.py', '--role=' + black_role_name, '--checkpoint_path=' + str(args.black_weight_path)],
+        stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+
+    agent_v1 = subprocess.Popen(
+        ['python', '-u', 'player.py', '--role=' + white_role_name, '--checkpoint_path=' + str(args.white_weight_path)],
+        stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 
     server_list = ""
-    while ("black" not in server_list) or ("white" not in server_list):
+    while (black_role_name not in server_list) or (white_role_name not in server_list):
         server_list = subprocess.check_output(['pyro4-nsc', 'list'])
         print "Waiting for the server start..."
         time.sleep(1)
@@ -50,51 +71,82 @@ if __name__ == '__main__':
     print "Start black player at : " + str(agent_v0.pid)
     print "Start white player at : " + str(agent_v1.pid)
 
+    data = Data()
     player = [None] * 2
-    player[0] = Pyro4.Proxy("PYRONAME:black")
-    player[1] = Pyro4.Proxy("PYRONAME:white")
+    player[0] = Pyro4.Proxy("PYRONAME:" + black_role_name)
+    player[1] = Pyro4.Proxy("PYRONAME:" + white_role_name)
 
     role = ["BLACK", "WHITE"]
     color = ['b', 'w']
 
     pattern = "[A-Z]{1}[0-9]{1}"
+    space = re.compile("\s+")
     size = 9
     show = ['.', 'X', 'O']
 
     evaluate_rounds = 1
     game_num = 0
-    while game_num < evaluate_rounds:
-        num = 0
-        pass_flag = [False, False]
-        print("Start game {}".format(game_num))
-        # end the game if both palyer chose to pass, or play too much turns
-        while not (pass_flag[0] and pass_flag[1]) and num < size ** 2 * 2:
-            turn = num % 2
-            move = player[turn].run_cmd(str(num) + ' genmove ' + color[turn] + '\n')
-            print role[turn] + " : " + str(move),
-            num += 1
-            match = re.search(pattern, move)
-            if match is not None:
-                # print "match : " + str(match.group())
-                play_or_pass = match.group()
-                pass_flag[turn] = False
+    try:
+        while True:
+            start_time = time.time()
+            num = 0
+            pass_flag = [False, False]
+            print("Start game {}".format(game_num))
+            # end the game if both palyer chose to pass, or play too much turns
+            while not (pass_flag[0] and pass_flag[1]) and num < size ** 2 * 2:
+                turn = num % 2
+                board = player[turn].run_cmd(str(num) + ' show_board')
+                board = eval(board[board.index('['):board.index(']') + 1])
+                for i in range(size):
+                    for j in range(size):
+                        print show[board[i * size + j]] + " ",
+                    print "\n",
+                data.boards.append(board)
+                move = player[turn].run_cmd(str(num) + ' genmove ' + color[turn] + '\n')
+                print role[turn] + " : " + str(move),
+                num += 1
+                match = re.search(pattern, move)
+                if match is not None:
+                    # print "match : " + str(match.group())
+                    play_or_pass = match.group()
+                    pass_flag[turn] = False
+                else:
+                    # print "no match"
+                    play_or_pass = ' PASS'
+                    pass_flag[turn] = True
+                result = player[1 - turn].run_cmd(str(num) + ' play ' + color[turn] + ' ' + play_or_pass + '\n')
+                prob = player[turn].run_cmd(str(num) + ' get_prob')
+                prob = space.sub(',', prob[prob.index('['):prob.index(']') + 1])
+                prob = prob.replace('[,', '[')
+                prob = prob.replace('],', ']')
+                prob = eval(prob)
+                data.probs.append(prob)
+            score = player[turn].run_cmd(str(num) + ' get_score')
+            print "Finished : ", score.split(" ")[1]
+            # TODO: generalize the player
+            if eval(score.split(" ")[1]) > 0:
+                data.winner = 1
+            if eval(score.split(" ")[1]) < 0:
+                data.winner = -1
+            player[0].run_cmd(str(num) + ' clear_board')
+            player[1].run_cmd(str(num) + ' clear_board')
+            file_list = os.listdir(args.result_path)
+            if not file_list:
+                data_num = 0
             else:
-                # print "no match"
-                play_or_pass = ' PASS'
-                pass_flag[turn] = True
-            result = player[1 - turn].run_cmd(str(num) + ' play ' + color[turn] + ' ' + play_or_pass + '\n')
-            board = player[turn].run_cmd(str(num) + ' show_board')
-            board = eval(board[board.index('['):board.index(']') + 1])
-            for i in range(size):
-                for j in range(size):
-                    print show[board[i * size + j]] + " ",
-                print "\n",
+                file_list.sort(key=lambda file: os.path.getmtime(args.result_path + file) if not os.path.isdir(
+                    args.result_path + file) else 0)
+                data_num = eval(file_list[-1][:-4]) + 1
+            with open("./data/" + str(data_num) + ".pkl", "w") as file:
+                picklestring = cPickle.dump(data, file)
+            data.reset()
+            game_num += 1
 
-        score = player[turn].run_cmd(str(num) + ' get_score')
-        print "Finished : ", score.split(" ")[1]
-        player[0].run_cmd(str(num) + ' clear_board')
-        player[1].run_cmd(str(num) + ' clear_board')
-        game_num += 1
+    except Exception as e:
+	print(e)
+        subprocess.call(["kill", "-9", str(agent_v0.pid)])
+        subprocess.call(["kill", "-9", str(agent_v1.pid)])
+        print "Kill all player, finish all game."
 
     subprocess.call(["kill", "-9", str(agent_v0.pid)])
     subprocess.call(["kill", "-9", str(agent_v1.pid)])
diff --git a/AlphaGo/player.py b/AlphaGo/player.py
index b468cf3..0e3daff 100644
--- a/AlphaGo/player.py
+++ b/AlphaGo/player.py
@@ -20,6 +20,7 @@ class Player(object):
         #return "inside the Player of player.py"
         return self.engine.run_cmd(command)
 
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument("--checkpoint_path", type=str, default=None)
diff --git a/AlphaGo/self-play.py b/AlphaGo/self-play.py
index 98ccf84..4387b24 100644
--- a/AlphaGo/self-play.py
+++ b/AlphaGo/self-play.py
@@ -79,7 +79,7 @@ while True:
         prob.append(np.array(game.prob).reshape(-1, game.size ** 2 + 1))
     print("Finished")
     print("\n")
-    score = game.executor.get_score(True)
+    score = game.game_engine.executor_get_score(game.board, True)
     if score > 0:
         winner = utils.BLACK
     else:
diff --git a/AlphaGo/strategy.py b/AlphaGo/strategy.py
deleted file mode 100644
index 112f130..0000000
--- a/AlphaGo/strategy.py
+++ /dev/null
@@ -1,210 +0,0 @@
-import os, sys
-
-sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir))
-import numpy as np
-import utils
-import time
-import copy
-import network_small
-import tensorflow as tf
-from collections import deque
-from tianshou.core.mcts.mcts import MCTS
-
-DELTA = [[1, 0], [-1, 0], [0, -1], [0, 1]]
-CORNER_OFFSET = [[-1, -1], [-1, 1], [1, 1], [1, -1]]
-
-class GoEnv:
-    def __init__(self, **kwargs):
-        self.game = kwargs['game']
-        self.simulate_board = [utils.EMPTY] * (self.game.size ** 2)
-        self.simulate_latest_boards = deque(maxlen=8)
-
-    def simulate_flatten(self, vertex):
-        x, y = vertex
-        return (x - 1) * self.game.size + (y - 1)
-
-    def simulate_deflatten(self, idx):
-        x = idx // self.game.size + 1
-        y = idx % self.game.size + 1
-        return (x, y)
-
-    def _find_group(self, start):
-        color = self.simulate_board[self.simulate_flatten(start)]
-        # print ("color : ", color)
-        chain = set()
-        frontier = [start]
-        has_liberty = False
-        while frontier:
-            current = frontier.pop()
-            # print ("current : ", current)
-            chain.add(current)
-            for n in self._neighbor(current):
-                # print n, self._flatten(n), self.board[self._flatten(n)],
-                if self.simulate_board[self.simulate_flatten(n)] == color and not n in chain:
-                    frontier.append(n)
-                if self.simulate_board[self.simulate_flatten(n)] == utils.EMPTY:
-                    has_liberty = True
-        return has_liberty, chain
-
-    def _is_suicide(self, color, vertex):
-        self.simulate_board[self.simulate_flatten(vertex)] = color # assume that we already take this move
-        suicide = False
-
-        has_liberty, group = self._find_group(vertex)
-        if not has_liberty:
-            suicide = True # no liberty, suicide
-            for n in self._neighbor(vertex):
-                if self.simulate_board[self.simulate_flatten(n)] == utils.another_color(color):
-                    opponent_liberty, group = self._find_group(n)
-                    if not opponent_liberty:
-                        suicide = False # this move is able to take opponent's stone, not suicide
-
-        self.simulate_board[self.simulate_flatten(vertex)] = utils.EMPTY # undo this move
-        return suicide
-
-    def _check_global_isomorphous(self, color, vertex):
-        ##backup
-        _board = copy.copy(self.simulate_board)
-        self.simulate_board[self.simulate_flatten(vertex)] = color
-        self._process_board(color, vertex)
-        if self.simulate_board in self.game.history:
-            res = True
-        else:
-            res = False
-
-        self.simulate_board = _board
-        return res
-
-    def _in_board(self, vertex):
-        x, y = vertex
-        if x < 1 or x > self.game.size: return False
-        if y < 1 or y > self.game.size: return False
-        return True
-
-    def _neighbor(self, vertex):
-        x, y = vertex
-        nei = []
-        for d in DELTA:
-            _x = x + d[0]
-            _y = y + d[1]
-            if self._in_board((_x, _y)):
-                nei.append((_x, _y))
-        return nei
-
-    def _corner(self, vertex):
-        x, y = vertex
-        corner = []
-        for d in CORNER_OFFSET:
-            _x = x + d[0]
-            _y = y + d[1]
-            if self._in_board((_x, _y)):
-                corner.append((_x, _y))
-        return corner
-
-    def _process_board(self, color, vertex):
-        nei = self._neighbor(vertex)
-        for n in nei:
-            if self.simulate_board[self.simulate_flatten(n)] == utils.another_color(color):
-                has_liberty, group = self._find_group(n)
-                if not has_liberty:
-                    for b in group:
-                        self.simulate_board[self.simulate_flatten(b)] = utils.EMPTY
-
-    def _is_eye(self, color, vertex):
-        nei = self._neighbor(vertex)
-        cor = self._corner(vertex)
-        ncolor = {color == self.simulate_board[self.simulate_flatten(n)] for n in nei}
-        if False in ncolor:
-            # print "not all neighbors are in same color with us"
-            return False
-        _, group = self._find_group(nei[0])
-        if set(nei) < group:
-            # print "all neighbors are in same group and same color with us"
-            return True
-        else:
-            opponent_number = [self.simulate_board[self.simulate_flatten(c)] for c in cor].count(-color)
-            opponent_propotion = float(opponent_number) / float(len(cor))
-            if opponent_propotion < 0.5:
-                # print "few opponents, real eye"
-                return True
-            else:
-                # print "many opponents, fake eye"
-                return False
-
-    def knowledge_prunning(self, color, vertex):
-        ### check if it is an eye of yourself
-        ### assumptions : notice that this judgement requires that the state is an endgame
-        if self._is_eye(color, vertex):
-            return False
-        return True
-
-    def simulate_is_valid(self, state, action):
-        # State is the play board, the shape is [1, self.game.size, self.game.size, 17].
-        # Action is an index
-        # We need to transfer the (state, action) pair into (color, vertex) pair to simulate the move
-        if action == self.game.size ** 2:
-            vertex = (0, 0)
-        else:
-            vertex = self.simulate_deflatten(action)
-        if state[0, 0, 0, -1] == utils.BLACK:
-            color = utils.BLACK
-        else:
-            color = utils.WHITE
-        self.simulate_latest_boards.clear()
-        for i in range(8):
-            self.simulate_latest_boards.append((state[:, :, :, i] - state[:, :, :, i + 8]).reshape(-1).tolist())
-        self.simulate_board = copy.copy(self.simulate_latest_boards[-1])
-
-        ### in board
-        if not self._in_board(vertex):
-            return False
-
-        ### already have stone
-        if not self.simulate_board[self.simulate_flatten(vertex)] == utils.EMPTY:
-            # print(np.array(self.board).reshape(9, 9))
-            # print(vertex)
-            return False
-
-        ### check if it is suicide
-        if self._is_suicide(color, vertex):
-            return False
-
-        ### forbid global isomorphous
-        if self._check_global_isomorphous(color, vertex):
-            return False
-
-        if not self.knowledge_prunning(color, vertex):
-            return False
-
-        return True
-
-    def simulate_do_move(self, color, vertex):
-        if vertex == utils.PASS:
-            return True
-
-        id_ = self.simulate_flatten(vertex)
-        if self.simulate_board[id_] == utils.EMPTY:
-            self.simulate_board[id_] = color
-            return True
-        else:
-            return False
-
-    def step_forward(self, state, action):
-        if state[0, 0, 0, -1] == 1:
-            color = utils.BLACK
-        else:
-            color = utils.WHITE
-        if action == self.game.size ** 2:
-            vertex = utils.PASS
-        else:
-            vertex = self.simulate_deflatten(action)
-        # print(vertex)
-        # print(self.board)
-        self.simulate_board = (state[:, :, :, 7] - state[:, :, :, 15]).reshape(-1).tolist()
-        self.simulate_do_move(color, vertex)
-        new_state = np.concatenate(
-            [state[:, :, :, 1:8], (np.array(self.simulate_board) == utils.BLACK).reshape(1, self.game.size, self.game.size, 1),
-             state[:, :, :, 9:16], (np.array(self.simulate_board) == utils.WHITE).reshape(1, self.game.size, self.game.size, 1),
-             np.array(1 - state[:, :, :, -1]).reshape(1, self.game.size, self.game.size, 1)],
-            axis=3)
-        return new_state, 0
diff --git a/tianshou/core/mcts/evaluator.py b/tianshou/core/mcts/evaluator.py
index 9c4ee8e..a1f9456 100644
--- a/tianshou/core/mcts/evaluator.py
+++ b/tianshou/core/mcts/evaluator.py
@@ -19,10 +19,10 @@ class rollout_policy(evaluator):
         # TODO: prior for rollout policy
         total_reward = 0.
         action = np.random.randint(0, self.action_num)
-        state, reward = self.env.step_forward(state, action)
+        state, reward = self.env.simulate_step_forward(state, action)
         total_reward += reward
         while state is not None:
             action = np.random.randint(0, self.action_num)
-            state, reward = self.env.step_forward(state, action)
+            state, reward = self.env.simulate_step_forward(state, action)
             total_reward += reward
         return np.ones([self.action_num])/self.action_num, total_reward
diff --git a/tianshou/core/mcts/mcts.py b/tianshou/core/mcts/mcts.py
index 979e994..8bb5f06 100644
--- a/tianshou/core/mcts/mcts.py
+++ b/tianshou/core/mcts/mcts.py
@@ -71,15 +71,10 @@ class UCTNode(MCTSNode):
                 self.parent.backpropagation(self.children[action].reward)
 
     def valid_mask(self, simulator):
+        # let all invalid actions be illeagel in mcts
         if self.mask is None:
-            start_time = time.time()
-            self.mask = []
-            for act in range(self.action_num - 1):
-                if not simulator.simulate_is_valid(self.state, act):
-                    self.mask.append(act)
-                    self.ucb[act] = -float("Inf")
-        else:
-            self.ucb[self.mask] = -float("Inf")
+            self.mask = simulator.simulate_is_valid_list(self.state, range(self.action_num))
+        self.ucb[self.mask] = -float("Inf")
 
 
 class TSNode(MCTSNode):
@@ -116,7 +111,7 @@ class ActionNode(object):
             self.next_state = tuple2list(self.next_state)
 
     def selection(self, simulator):
-        self.next_state, self.reward = simulator.step_forward(self.parent.state, self.action)
+        self.next_state, self.reward = simulator.simulate_step_forward(self.parent.state, self.action)
         self.origin_state = self.next_state
         self.state_type = type(self.next_state)
         self.type_conversion_to_tuple()
@@ -143,8 +138,7 @@ class ActionNode(object):
 
 
 class MCTS(object):
-    def __init__(self, simulator, evaluator, root, action_num, method="UCT", inverse=False, max_step=None,
-                 max_time=None):
+    def __init__(self, simulator, evaluator, root, action_num, method="UCT", inverse=False):
         self.simulator = simulator
         self.evaluator = evaluator
         prior, _ = self.evaluator(root)
@@ -152,33 +146,26 @@ class MCTS(object):
         if method == "":
             self.root = root
         if method == "UCT":
-            self.root = UCTNode(None, None, root, action_num, prior, inverse)
+            self.root = UCTNode(None, None, root, action_num, prior, inverse=inverse)
         if method == "TS":
             self.root = TSNode(None, None, root, action_num, prior, inverse=inverse)
         self.inverse = inverse
-        if max_step is not None:
-            self.step = 0
-            self.max_step = max_step
-        # TODO: Optimize the stop criteria
-        # else:
-        #     self.max_step = 0
-        if max_time is not None:
-            self.start_time = time.time()
-            self.max_time = max_time
+
+    def search(self, max_step=None, max_time=None):
+        step = 0
+        start_time = time.time()
+        if max_step is None:
+            max_step = int("Inf")
+        if max_time is None:
+            max_time = float("Inf")
         if max_step is None and max_time is None:
             raise ValueError("Need a stop criteria!")
 
-        # TODO: running mcts should be implemented in another function, e.g. def search(self, max_step, max_time)
-        self.select_time = []
-        self.evaluate_time = []
-        self.bp_time = []
-        while (max_step is not None and self.step < self.max_step or max_step is None) \
-                and (max_time is not None and time.time() - self.start_time < self.max_time or max_time is None):
-            self.expand()
-            if max_step is not None:
-                self.step += 1
+        while step < max_step and time.time() - start_time < max_step:
+            self._expand()
+            step += 1
 
-    def expand(self):
+    def _expand(self):
         node, new_action = self.root.selection(self.simulator)
         value = node.children[new_action].expansion(self.evaluator, self.action_num)
         node.children[new_action].backpropagation(value + 0.)