rewrite selection function of ActionNode for clarity, add and delete some notes

This commit is contained in:
Dong Yan 2017-12-27 11:43:04 +08:00
parent c788b253fb
commit a1f6044cba
2 changed files with 8 additions and 13 deletions

View File

@ -24,7 +24,6 @@ class Data(object):
def reset(self):
self.__init__()
if __name__ == '__main__':
"""
Starting two different players which load network weights to evaluate the winning ratio.
@ -81,13 +80,13 @@ if __name__ == '__main__':
black_role_name = 'black' + str(args.id)
white_role_name = 'white' + str(args.id)
#TODO : check if we can get the output of player from the stdout, for debug convenience
black_player = subprocess.Popen(
['python', '-u', 'player.py', '--game=' + args.game, '--role=' + black_role_name,
'--checkpoint_path=' + str(args.black_weight_path), '--debug=' + str(args.debug)],
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
bp_output = black_player.stdout.readline()
bp_message = bp_output
# '' means player.py failed to start, "Start requestLoop" means player.py start successfully
while bp_output != '' and "Start requestLoop" not in bp_output:
bp_output = black_player.stdout.readline()
bp_message += bp_output

View File

@ -97,7 +97,6 @@ class ActionNode(object):
self.action = action
self.children = {}
self.next_state = None
self.origin_state = None
self.state_type = None
self.reward = 0
self.mcts = mcts
@ -118,18 +117,15 @@ class ActionNode(object):
head = time.time()
self.next_state, self.reward = simulator.simulate_step_forward(self.parent.state, self.action)
self.mcts.simulate_sf_time += time.time() - head
if self.next_state is None: # next_state is None means that self.parent.state is the terminate state
self.mcts.action_selection_time += time.time() - head
return self.parent, self.action
self.origin_state = self.next_state
self.state_type = type(self.next_state)
self.type_conversion_to_tuple()
if self.next_state is not None:
if self.next_state in self.children.keys():
self.mcts.action_selection_time += time.time() - head
return self.children[self.next_state].selection(simulator)
else:
self.mcts.action_selection_time += time.time() - head
return self.parent, self.action
else:
# self.next_state is None means we have reach the terminate state
if self.next_state in self.children.keys(): # next state has already visited before
self.mcts.action_selection_time += time.time() - head
return self.children[self.next_state].selection(simulator)
else: # next state is a new state never seen before
self.mcts.action_selection_time += time.time() - head
return self.parent, self.action