diff --git a/AlphaGo/play.py b/AlphaGo/play.py index 038953f..7c7961c 100644 --- a/AlphaGo/play.py +++ b/AlphaGo/play.py @@ -24,7 +24,6 @@ class Data(object): def reset(self): self.__init__() - if __name__ == '__main__': """ Starting two different players which load network weights to evaluate the winning ratio. @@ -81,13 +80,13 @@ if __name__ == '__main__': black_role_name = 'black' + str(args.id) white_role_name = 'white' + str(args.id) - #TODO : check if we can get the output of player from the stdout, for debug convenience black_player = subprocess.Popen( ['python', '-u', 'player.py', '--game=' + args.game, '--role=' + black_role_name, '--checkpoint_path=' + str(args.black_weight_path), '--debug=' + str(args.debug)], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) bp_output = black_player.stdout.readline() bp_message = bp_output + # '' means player.py failed to start, "Start requestLoop" means player.py start successfully while bp_output != '' and "Start requestLoop" not in bp_output: bp_output = black_player.stdout.readline() bp_message += bp_output diff --git a/tianshou/core/mcts/mcts.py b/tianshou/core/mcts/mcts.py index 3d547c6..f64b5a0 100644 --- a/tianshou/core/mcts/mcts.py +++ b/tianshou/core/mcts/mcts.py @@ -97,7 +97,6 @@ class ActionNode(object): self.action = action self.children = {} self.next_state = None - self.origin_state = None self.state_type = None self.reward = 0 self.mcts = mcts @@ -118,18 +117,15 @@ class ActionNode(object): head = time.time() self.next_state, self.reward = simulator.simulate_step_forward(self.parent.state, self.action) self.mcts.simulate_sf_time += time.time() - head + if self.next_state is None: # next_state is None means that self.parent.state is the terminate state + self.mcts.action_selection_time += time.time() - head + return self.parent, self.action self.origin_state = self.next_state - self.state_type = type(self.next_state) self.type_conversion_to_tuple() - if self.next_state is not None: - if self.next_state in self.children.keys(): - self.mcts.action_selection_time += time.time() - head - return self.children[self.next_state].selection(simulator) - else: - self.mcts.action_selection_time += time.time() - head - return self.parent, self.action - else: - # self.next_state is None means we have reach the terminate state + if self.next_state in self.children.keys(): # next state has already visited before + self.mcts.action_selection_time += time.time() - head + return self.children[self.next_state].selection(simulator) + else: # next state is a new state never seen before self.mcts.action_selection_time += time.time() - head return self.parent, self.action