diff --git a/AlphaGo/play.py b/AlphaGo/play.py
index 038953f..7c7961c 100644
--- a/AlphaGo/play.py
+++ b/AlphaGo/play.py
@@ -24,7 +24,6 @@ class Data(object):
     def reset(self):
         self.__init__()
 
-
 if __name__ == '__main__':
     """
     Starting two different players which load network weights to evaluate the winning ratio.
@@ -81,13 +80,13 @@ if __name__ == '__main__':
     black_role_name = 'black' + str(args.id)
     white_role_name = 'white' + str(args.id)
 
-    #TODO : check if we can get the output of player from the stdout, for debug convenience
     black_player = subprocess.Popen(
         ['python', '-u', 'player.py', '--game=' + args.game, '--role=' + black_role_name,
          '--checkpoint_path=' + str(args.black_weight_path), '--debug=' + str(args.debug)],
         stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     bp_output = black_player.stdout.readline()
     bp_message = bp_output
+    # '' means player.py failed to start, "Start requestLoop" means player.py start successfully
     while bp_output != '' and "Start requestLoop" not in bp_output:
         bp_output = black_player.stdout.readline()
         bp_message += bp_output
diff --git a/tianshou/core/mcts/mcts.py b/tianshou/core/mcts/mcts.py
index 3d547c6..f64b5a0 100644
--- a/tianshou/core/mcts/mcts.py
+++ b/tianshou/core/mcts/mcts.py
@@ -97,7 +97,6 @@ class ActionNode(object):
         self.action = action
         self.children = {}
         self.next_state = None
-        self.origin_state = None
         self.state_type = None
         self.reward = 0
         self.mcts = mcts
@@ -118,18 +117,15 @@ class ActionNode(object):
         head = time.time()
         self.next_state, self.reward = simulator.simulate_step_forward(self.parent.state, self.action)
         self.mcts.simulate_sf_time += time.time() - head
+        if self.next_state is None: # next_state is None means that self.parent.state is the terminate state
+            self.mcts.action_selection_time += time.time() - head
+            return self.parent, self.action
         self.origin_state = self.next_state
-        self.state_type = type(self.next_state)
         self.type_conversion_to_tuple()
-        if self.next_state is not None:
-            if self.next_state in self.children.keys():
-                self.mcts.action_selection_time += time.time() - head
-                return self.children[self.next_state].selection(simulator)
-            else:
-                self.mcts.action_selection_time += time.time() - head
-                return self.parent, self.action
-        else:
-            # self.next_state is None means we have reach the terminate state
+        if self.next_state in self.children.keys(): # next state has already visited before
+            self.mcts.action_selection_time += time.time() - head
+            return self.children[self.next_state].selection(simulator)
+        else: # next state is a new state never seen before
             self.mcts.action_selection_time += time.time() - head
             return self.parent, self.action