diff --git a/AlphaGo/engine.py b/AlphaGo/engine.py
index 98e5e61..5624a2f 100644
--- a/AlphaGo/engine.py
+++ b/AlphaGo/engine.py
@@ -6,6 +6,8 @@
 #
 
 from game import Game
+import copy
+import numpy as np
 import utils
 
 
@@ -186,7 +188,10 @@ class GTPEngine():
         return self._game.game_engine.executor_get_score(self._game.board), True
 
     def cmd_show_board(self, args, **kwargs):
-        return self._game.board, True
+        board = copy.deepcopy(self._game.board)
+        if isinstance(board, np.ndarray):
+            board = board.flatten().tolist()
+        return board, True
 
     def cmd_get_prob(self, args, **kwargs):
         return self._game.prob, True
diff --git a/AlphaGo/game.py b/AlphaGo/game.py
index 442cb73..3a7959c 100644
--- a/AlphaGo/game.py
+++ b/AlphaGo/game.py
@@ -26,33 +26,37 @@ class Game:
     TODO : Maybe merge with the engine class in future, 
     currently leave it untouched for interacting with Go UI.
     '''
-    def __init__(self, name="go", role="unknown", debug=False, checkpoint_path=None):
+    def __init__(self, name="reversi", role="unknown", debug=False, checkpoint_path=None):
         self.name = name
         self.role = role
         self.debug = debug
         if self.name == "go":
             self.size = 9
             self.komi = 3.75
-            self.board = [utils.EMPTY] * (self.size ** 2)
             self.history = []
             self.history_length = 8
-            self.latest_boards = deque(maxlen=8)
-            for _ in range(8):
-                self.latest_boards.append(self.board)
             self.game_engine = go.Go(size=self.size, komi=self.komi, role=self.role)
+            self.board = [utils.EMPTY] * (self.size ** 2)
         elif self.name == "reversi":
             self.size = 8
             self.history_length = 1
-            self.game_engine = reversi.Reversi()
+            self.history = []
+            self.game_engine = reversi.Reversi(size=self.size)
             self.board = self.game_engine.get_board()
         else:
             raise ValueError(name + " is an unknown game...")
 
         self.evaluator = model.ResNet(self.size, self.size ** 2 + 1, history_length=self.history_length)
+        self.latest_boards = deque(maxlen=self.history_length)
+        for _ in range(self.history_length):
+            self.latest_boards.append(self.board)
 
     def clear(self):
-        self.board = [utils.EMPTY] * (self.size ** 2)
-        self.history = []
+        if self.name == "go":
+            self.board = [utils.EMPTY] * (self.size ** 2)
+            self.history = []
+        if self.name == "reversi":
+            self.board = self.game_engine.get_board()
         for _ in range(self.history_length):
             self.latest_boards.append(self.board)
 
@@ -84,7 +88,7 @@ class Game:
         if self.name == "go":
             res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex)
         elif self.name == "reversi":
-            res = self.game_engine.executor_do_move(self.board, color, vertex)
+            res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex)
         return res
 
     def think_play_move(self, color):
@@ -110,13 +114,14 @@ class Game:
             if row[i] < 10:
                 print(' ', end='')
             for j in range(self.size):
-                print(self.status2symbol(self.board[self._flatten((j + 1, i + 1))]), end='  ')
+                print(self.status2symbol(self.board[self.game_engine._flatten((j + 1, i + 1))]), end='  ')
             print('')
         sys.stdout.flush()
 
 if __name__ == "__main__":
-    g = Game()
-    g.show_board()
+    g = Game("go")
+    print(g.board)
+    g.clear()
     g.think_play_move(1)
     #file = open("debug.txt", "a")
     #file.write("mcts check\n")
diff --git a/AlphaGo/go.py b/AlphaGo/go.py
index 833b01f..aca6632 100644
--- a/AlphaGo/go.py
+++ b/AlphaGo/go.py
@@ -212,12 +212,12 @@ class Go:
 
     def simulate_step_forward(self, state, action):
         # initialize the simulate_board from state
-        history_boards, color = state
+        history_boards, color = copy.deepcopy(state)
         if history_boards[-1] == history_boards[-2] and action is utils.PASS:
             return None, 2 * (float(self.executor_get_score(history_boards[-1]) > 0)-0.5) * color
         else:
             vertex = self._action2vertex(action)
-            new_board = self._do_move(copy.copy(history_boards[-1]), color, vertex)
+            new_board = self._do_move(copy.deepcopy(history_boards[-1]), color, vertex)
             history_boards.append(new_board)
             new_color = -color
             return [history_boards, new_color], 0
@@ -227,8 +227,8 @@ class Go:
             return False
         current_board[self._flatten(vertex)] = color
         self._process_board(current_board, color, vertex)
-        history.append(copy.copy(current_board))
-        latest_boards.append(copy.copy(current_board))
+        history.append(copy.deepcopy(current_board))
+        latest_boards.append(copy.deepcopy(current_board))
         return True
 
     def _find_empty(self, current_board):
diff --git a/AlphaGo/model.py b/AlphaGo/model.py
index 2a620f9..0549f41 100644
--- a/AlphaGo/model.py
+++ b/AlphaGo/model.py
@@ -173,10 +173,10 @@ class ResNet(object):
         """
         state = np.zeros([1, self.board_size, self.board_size, 2 * self.history_length + 1])
         for i in range(self.history_length):
-            state[0, :, :, i] = np.array(np.array(history[i]) == np.ones(self.board_size ** 2)).reshape(self.board_size,
+            state[0, :, :, i] = np.array(np.array(history[i]).flatten() == np.ones(self.board_size ** 2)).reshape(self.board_size,
                                                                                                         self.board_size)
             state[0, :, :, i + self.history_length] = np.array(
-                np.array(history[i]) == -np.ones(self.board_size ** 2)).reshape(self.board_size, self.board_size)
+                np.array(history[i]).flatten() == -np.ones(self.board_size ** 2)).reshape(self.board_size, self.board_size)
         # TODO: need a config to specify the BLACK and WHITE
         if color == +1:
             state[0, :, :, 2 * self.history_length] = np.ones([self.board_size, self.board_size])
@@ -301,7 +301,7 @@ class ResNet(object):
         :return:
         """
 
-        new_board = copy.copy(board)
+        new_board = copy.deepcopy(board)
         if new_board.ndim == 3:
             new_board = np.expand_dims(new_board, axis=0)
 
@@ -331,7 +331,7 @@ class ResNet(object):
         :param orientation: an integer, which orientation to reflect
         :return:
         """
-        new_board = copy.copy(board)
+        new_board = copy.deepcopy(board)
         for _ in range(times):
             if orientation == 0:
                 new_board = new_board[:, ::-1]
diff --git a/AlphaGo/play.py b/AlphaGo/play.py
index 9144a40..2731948 100644
--- a/AlphaGo/play.py
+++ b/AlphaGo/play.py
@@ -89,7 +89,7 @@ if __name__ == '__main__':
 
     pattern = "[A-Z]{1}[0-9]{1}"
     space = re.compile("\s+")
-    size = 9
+    size = {"go":9, "reversi":8}
     show = ['.', 'X', 'O']
 
     evaluate_rounds = 1
@@ -102,13 +102,13 @@ if __name__ == '__main__':
             pass_flag = [False, False]
             print("Start game {}".format(game_num))
             # end the game if both palyer chose to pass, or play too much turns
-            while not (pass_flag[0] and pass_flag[1]) and num < size ** 2 * 2:
+            while not (pass_flag[0] and pass_flag[1]) and num < size["reversi"] ** 2 * 2:
                 turn = num % 2
                 board = player[turn].run_cmd(str(num) + ' show_board')
                 board = eval(board[board.index('['):board.index(']') + 1])
-                for i in range(size):
-                    for j in range(size):
-                        print show[board[i * size + j]] + " ",
+                for i in range(size["reversi"]):
+                    for j in range(size["reversi"]):
+                        print show[board[i * size["reversi"] + j]] + " ",
                     print "\n",
                 data.boards.append(board)
                 start_time = time.time()
diff --git a/AlphaGo/reversi.py b/AlphaGo/reversi.py
index 4fa1468..c6c8a5b 100644
--- a/AlphaGo/reversi.py
+++ b/AlphaGo/reversi.py
@@ -1,4 +1,5 @@
 import numpy as np
+import copy
 '''
 Settings of the Reversi game.
 
@@ -8,13 +9,8 @@ Settings of the Reversi game.
 
 
 class Reversi:
-    def __init__(self, black=None, white=None):
-        self.board = None  # 8 * 8 board with 1 for black, -1 for white and 0 for blank
-        self.color = None  # 1 for black and -1 for white
-        self.action = None   # number in 0~63
-        self.winner = None
-        self.black_win = None
-        self.size = 8
+    def __init__(self, **kwargs):
+        self.size = kwargs['size']
 
     def _deflatten(self, idx):
         x = idx // self.size + 1
@@ -24,39 +20,39 @@ class Reversi:
     def _flatten(self, vertex):
         x, y = vertex
         if (x == 0) and (y == 0):
-            return 64
+            return self.size ** 2
         return (x - 1) * self.size + (y - 1)
 
-    def get_board(self, board=None):
-        self.board = board or np.zeros([8,8])
-        self.board[3, 3] = -1
-        self.board[4, 4] = -1
-        self.board[3, 4] = 1
-        self.board[4, 3] = 1
-        return self.board
+    def get_board(self):
+        board = np.zeros([self.size, self.size], dtype=np.int32)
+        board[self.size / 2 - 1, self.size / 2 - 1] = -1
+        board[self.size / 2, self.size / 2] = -1
+        board[self.size / 2 - 1, self.size / 2] = 1
+        board[self.size / 2, self.size / 2 - 1] = 1
+        return board
 
-    def _find_correct_moves(self, is_next=False):
+    def _find_correct_moves(self, board, color, is_next=False):
         moves = []
         if is_next:
-            color = 0 - self.color
+            new_color = 0 - color
         else:
-            color = self.color
-        for i in range(64):
+            new_color = color
+        for i in range(self.size ** 2):
             x, y = self._deflatten(i)
-            valid = self._is_valid(x - 1, y - 1, color)
+            valid = self._is_valid(board, x - 1, y - 1, new_color)
             if valid:
                 moves.append(i)
         return moves
 
-    def _one_direction_valid(self, x, y, color):
+    def _one_direction_valid(self, board, x, y, color):
         if (x >= 0) and (x < self.size):
             if (y >= 0) and (y < self.size):
-                if self.board[x, y] == color:
+                if board[x, y] == color:
                     return True
         return False
 
-    def _is_valid(self, x, y, color):
-        if self.board[x, y]:
+    def _is_valid(self, board, x, y, color):
+        if board[x, y]:
             return False
         for x_direction in [-1, 0, 1]:
             for y_direction in [-1, 0, 1]:
@@ -66,20 +62,18 @@ class Reversi:
                 while True:
                     new_x += x_direction
                     new_y += y_direction
-                    if self._one_direction_valid(new_x, new_y, 0 - color):
+                    if self._one_direction_valid(board, new_x, new_y, 0 - color):
                         flag = 1
                     else:
                         break
-                if self._one_direction_valid(new_x, new_y, color) and flag:
+                if self._one_direction_valid(board, new_x, new_y, color) and flag:
                     return True
         return False
 
     def simulate_get_mask(self, state, action_set):
-        history_boards, color = state
-        self.board = np.reshape(history_boards[-1], (self.size, self.size))
-        self.color = color
-        valid_moves = self._find_correct_moves()
-        print(valid_moves)
+        history_boards, color = copy.deepcopy(state)
+        board = copy.deepcopy(history_boards[-1])
+        valid_moves = self._find_correct_moves(board, color)
         if not len(valid_moves):
             invalid_action_mask = action_set[0:-1]
         else:
@@ -90,34 +84,34 @@ class Reversi:
         return invalid_action_mask
 
     def simulate_step_forward(self, state, action):
-        self.board = state[0].copy()
-        self.board = np.reshape(self.board, (self.size, self.size))
-        self.color = state[1]
-        self.action = action
-        if self.action == 64:
-            valid_moves = self._find_correct_moves(is_next=True)
+        history_boards, color = copy.deepcopy(state)
+        board = copy.deepcopy(history_boards[-1])
+        if action == self.size ** 2:
+            valid_moves = self._find_correct_moves(board, color, is_next=True)
             if not len(valid_moves):
-                self._game_over()
-                return None, self.winner * self.color
+                winner = self._get_winner(board)
+                return None, winner * color
             else:
-                return [self.board, 0 - self.color], 0
-        self._step()
-        return [self.board, 0 - self.color], 0
+                return [history_boards, 0 - color], 0
+        new_board = self._step(board, color, action)
+        history_boards.append(new_board)
+        return [history_boards, 0 - color], 0
 
-    def _game_over(self):
-        black_num, white_num = self._number_of_black_and_white()
-        self.black_win = black_num - white_num
-        if self.black_win > 0:
-            self.winner = 1
-        elif self.black_win < 0:
-            self.winner = -1
+    def _get_winner(self, board):
+        black_num, white_num = self._number_of_black_and_white(board)
+        black_win = black_num - white_num
+        if black_win > 0:
+            winner = 1
+        elif black_win < 0:
+            winner = -1
         else:
-            self.winner = 0
+            winner = 0
+        return winner
 
-    def _number_of_black_and_white(self):
+    def _number_of_black_and_white(self, board):
         black_num = 0
         white_num = 0
-        board_list = np.reshape(self.board, self.size ** 2)
+        board_list = np.reshape(board, self.size ** 2)
         for i in range(len(board_list)):
             if board_list[i] == 1:
                 black_num += 1
@@ -125,19 +119,18 @@ class Reversi:
                 white_num += 1
         return black_num, white_num
 
-    def _step(self):
-        if self.action < 0 or self.action > 63:
+    def _step(self, board, color, action):
+        if action < 0 or action > self.size ** 2 - 1:
             raise ValueError("Action not in the range of [0,63]!")
-        if self.action is None:
+        if action is None:
             raise ValueError("Action is None!")
-        x, y = self._deflatten(self.action)
-        valid = self._flip(x -1, y - 1)
-        if not valid:
-            raise ValueError("Illegal action!")
+        x, y = self._deflatten(action)
+        new_board = self._flip(board, x - 1, y - 1, color)
+        return new_board
 
-    def _flip(self, x, y):
+    def _flip(self, board, x, y, color):
         valid = 0
-        self.board[x, y] = self.color
+        board[x, y] = color
         for x_direction in [-1, 0, 1]:
             for y_direction in [-1, 0, 1]:
                 new_x = x
@@ -146,47 +139,46 @@ class Reversi:
                 while True:
                     new_x += x_direction
                     new_y += y_direction
-                    if self._one_direction_valid(new_x, new_y, 0 - self.color):
+                    if self._one_direction_valid(board, new_x, new_y, 0 - color):
                         flag = 1
                     else:
                         break
-                if self._one_direction_valid(new_x, new_y, self.color) and flag:
+                if self._one_direction_valid(board, new_x, new_y, color) and flag:
                     valid = 1
                     flip_x = x
                     flip_y = y
                     while True:
                         flip_x += x_direction
                         flip_y += y_direction
-                        if self._one_direction_valid(flip_x, flip_y, 0 - self.color):
-                            self.board[flip_x, flip_y] = self.color
+                        if self._one_direction_valid(board, flip_x, flip_y, 0 - color):
+                            board[flip_x, flip_y] = color
                         else:
                             break
         if valid:
-            return True
+            return board
         else:
-            return False
+            raise ValueError("Invalid action")
 
     def executor_do_move(self, history, latest_boards, board, color, vertex):
-        self.board = np.reshape(board, (self.size, self.size))
-        self.color = color
-        self.action = self._flatten(vertex)
-        if self.action == 64:
-            valid_moves = self._find_correct_moves(is_next=True)
+        board = np.reshape(board, (self.size, self.size))
+        color = color
+        action = self._flatten(vertex)
+        if action == self.size ** 2:
+            valid_moves = self._find_correct_moves(board, color, is_next=True)
             if not len(valid_moves):
                 return False
             else:
                 return True
         else:
-            self._step()
+            new_board = self._step(board, color, action)
+            history.append(new_board)
+            latest_boards.append(new_board)
             return True
 
     def executor_get_score(self, board):
-        self.board = board
-        self._game_over()
-        if self.black_win is not None:
-            return self.black_win
-        else:
-            raise ValueError("Game not finished!")
+        board = board
+        winner = self._get_winner(board)
+        return winner
 
 
 if __name__ == "__main__":
diff --git a/tianshou/core/mcts/mcts.py b/tianshou/core/mcts/mcts.py
index e565337..493cf7d 100644
--- a/tianshou/core/mcts/mcts.py
+++ b/tianshou/core/mcts/mcts.py
@@ -110,15 +110,15 @@ class ActionNode(object):
         self.reward = 0
 
     def type_conversion_to_tuple(self):
-        if type(self.next_state) is np.ndarray:
+        if isinstance(self.next_state, np.ndarray):
             self.next_state = self.next_state.tolist()
-        if type(self.next_state) is list:
+        if isinstance(self.next_state, list):
             self.next_state = list2tuple(self.next_state)
 
     def type_conversion_to_origin(self):
-        if self.state_type is np.ndarray:
+        if isinstance(self.state_type, np.ndarray):
             self.next_state = np.array(self.next_state)
-        if self.state_type is list:
+        if isinstance(self.state_type, np.ndarray):
             self.next_state = tuple2list(self.next_state)
 
     def selection(self, simulator):