rewrite selection function of ActionNode for clarity, add and delete some notes

2017-12-27 11:43:04 +08:00 · 2017-12-27 11:43:04 +08:00 · a1f6044cba
commit a1f6044cba
parent c788b253fb
2 changed files with 8 additions and 13 deletions
--- a/AlphaGo/play.py
+++ b/AlphaGo/play.py
@ -24,7 +24,6 @@ class Data(object):
    def reset(self):
        self.__init__()

-
 if __name__ == '__main__':
    """
    Starting two different players which load network weights to evaluate the winning ratio.
@ -81,13 +80,13 @@ if __name__ == '__main__':
    black_role_name = 'black' + str(args.id)
    white_role_name = 'white' + str(args.id)

-    #TODO : check if we can get the output of player from the stdout, for debug convenience
    black_player = subprocess.Popen(
        ['python', '-u', 'player.py', '--game=' + args.game, '--role=' + black_role_name,
         '--checkpoint_path=' + str(args.black_weight_path), '--debug=' + str(args.debug)],
        stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    bp_output = black_player.stdout.readline()
    bp_message = bp_output
+    # '' means player.py failed to start, "Start requestLoop" means player.py start successfully
    while bp_output != '' and "Start requestLoop" not in bp_output:
        bp_output = black_player.stdout.readline()
        bp_message += bp_output
--- a/tianshou/core/mcts/mcts.py
+++ b/tianshou/core/mcts/mcts.py
@ -97,7 +97,6 @@ class ActionNode(object):
        self.action = action
        self.children = {}
        self.next_state = None
-        self.origin_state = None
        self.state_type = None
        self.reward = 0
        self.mcts = mcts
@ -118,18 +117,15 @@ class ActionNode(object):
        head = time.time()
        self.next_state, self.reward = simulator.simulate_step_forward(self.parent.state, self.action)
        self.mcts.simulate_sf_time += time.time() - head
+        if self.next_state is None: # next_state is None means that self.parent.state is the terminate state
+            self.mcts.action_selection_time += time.time() - head
+            return self.parent, self.action
        self.origin_state = self.next_state
-        self.state_type = type(self.next_state)
        self.type_conversion_to_tuple()
-        if self.next_state is not None:
-            if self.next_state in self.children.keys():
-                self.mcts.action_selection_time += time.time() - head
-                return self.children[self.next_state].selection(simulator)
-            else:
-                self.mcts.action_selection_time += time.time() - head
-                return self.parent, self.action
-        else:
-            # self.next_state is None means we have reach the terminate state
+        if self.next_state in self.children.keys(): # next state has already visited before
+            self.mcts.action_selection_time += time.time() - head
+            return self.children[self.next_state].selection(simulator)
+        else: # next state is a new state never seen before
            self.mcts.action_selection_time += time.time() - head
            return self.parent, self.action