rewrite selection function of ActionNode for clarity, add and delete some notes
This commit is contained in:
parent
c788b253fb
commit
a1f6044cba
@ -24,7 +24,6 @@ class Data(object):
|
||||
def reset(self):
|
||||
self.__init__()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
"""
|
||||
Starting two different players which load network weights to evaluate the winning ratio.
|
||||
@ -81,13 +80,13 @@ if __name__ == '__main__':
|
||||
black_role_name = 'black' + str(args.id)
|
||||
white_role_name = 'white' + str(args.id)
|
||||
|
||||
#TODO : check if we can get the output of player from the stdout, for debug convenience
|
||||
black_player = subprocess.Popen(
|
||||
['python', '-u', 'player.py', '--game=' + args.game, '--role=' + black_role_name,
|
||||
'--checkpoint_path=' + str(args.black_weight_path), '--debug=' + str(args.debug)],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
bp_output = black_player.stdout.readline()
|
||||
bp_message = bp_output
|
||||
# '' means player.py failed to start, "Start requestLoop" means player.py start successfully
|
||||
while bp_output != '' and "Start requestLoop" not in bp_output:
|
||||
bp_output = black_player.stdout.readline()
|
||||
bp_message += bp_output
|
||||
|
@ -97,7 +97,6 @@ class ActionNode(object):
|
||||
self.action = action
|
||||
self.children = {}
|
||||
self.next_state = None
|
||||
self.origin_state = None
|
||||
self.state_type = None
|
||||
self.reward = 0
|
||||
self.mcts = mcts
|
||||
@ -118,18 +117,15 @@ class ActionNode(object):
|
||||
head = time.time()
|
||||
self.next_state, self.reward = simulator.simulate_step_forward(self.parent.state, self.action)
|
||||
self.mcts.simulate_sf_time += time.time() - head
|
||||
self.origin_state = self.next_state
|
||||
self.state_type = type(self.next_state)
|
||||
self.type_conversion_to_tuple()
|
||||
if self.next_state is not None:
|
||||
if self.next_state in self.children.keys():
|
||||
self.mcts.action_selection_time += time.time() - head
|
||||
return self.children[self.next_state].selection(simulator)
|
||||
else:
|
||||
if self.next_state is None: # next_state is None means that self.parent.state is the terminate state
|
||||
self.mcts.action_selection_time += time.time() - head
|
||||
return self.parent, self.action
|
||||
else:
|
||||
# self.next_state is None means we have reach the terminate state
|
||||
self.origin_state = self.next_state
|
||||
self.type_conversion_to_tuple()
|
||||
if self.next_state in self.children.keys(): # next state has already visited before
|
||||
self.mcts.action_selection_time += time.time() - head
|
||||
return self.children[self.next_state].selection(simulator)
|
||||
else: # next state is a new state never seen before
|
||||
self.mcts.action_selection_time += time.time() - head
|
||||
return self.parent, self.action
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user