replace two isolated player process by two different set of variables in the tf graph

This commit is contained in:
Dong Yan 2018-01-10 23:27:17 +08:00
parent f425085e0a
commit 5482815de6
4 changed files with 26 additions and 128 deletions

View File

@ -29,11 +29,8 @@ class Game:
currently leave it untouched for interacting with Go UI. currently leave it untouched for interacting with Go UI.
''' '''
def __init__(self, name=None, role=None, debug=False, black_checkpoint_path=None, white_checkpoint_path=None): def __init__(self, name=None, debug=False, black_checkpoint_path=None, white_checkpoint_path=None):
self.name = name self.name = name
if role is None:
raise ValueError("Need a role!")
self.role = role
self.debug = debug self.debug = debug
if self.name == "go": if self.name == "go":
self.size = 9 self.size = 9
@ -41,7 +38,7 @@ class Game:
self.history_length = 8 self.history_length = 8
self.history = [] self.history = []
self.history_set = set() self.history_set = set()
self.game_engine = go.Go(size=self.size, komi=self.komi, role=self.role) self.game_engine = go.Go(size=self.size, komi=self.komi)
self.board = [utils.EMPTY] * (self.size ** 2) self.board = [utils.EMPTY] * (self.size ** 2)
elif self.name == "reversi": elif self.name == "reversi":
self.size = 8 self.size = 8
@ -76,20 +73,22 @@ class Game:
self.komi = k self.komi = k
def think(self, latest_boards, color): def think(self, latest_boards, color):
if color == +1: if color == utils.BLACK:
role = 'black' role = 'black'
if color == -1: elif color == utils.WHITE:
role = 'white' role = 'white'
else:
raise ValueError("game.py[think] - unknown color : {}".format(color))
evaluator = lambda state:self.model(role, state) evaluator = lambda state:self.model(role, state)
mcts = MCTS(self.game_engine, evaluator, [latest_boards, color], mcts = MCTS(self.game_engine, evaluator, [latest_boards, color],
self.size ** 2 + 1, role=self.role, debug=self.debug, inverse=True) self.size ** 2 + 1, role=role, debug=self.debug, inverse=True)
mcts.search(max_step=100) mcts.search(max_step=100)
if self.debug: if self.debug:
file = open("mcts_debug.log", 'ab') file = open("mcts_debug.log", 'ab')
np.savetxt(file, mcts.root.Q, header="\n" + self.role + " Q value : ", fmt='%.4f', newline=", ") np.savetxt(file, mcts.root.Q, header="\n" + role + " Q value : ", fmt='%.4f', newline=", ")
np.savetxt(file, mcts.root.W, header="\n" + self.role + " W value : ", fmt='%.4f', newline=", ") np.savetxt(file, mcts.root.W, header="\n" + role + " W value : ", fmt='%.4f', newline=", ")
np.savetxt(file, mcts.root.N, header="\n" + self.role + " N value : ", fmt="%d", newline=", ") np.savetxt(file, mcts.root.N, header="\n" + role + " N value : ", fmt="%d", newline=", ")
np.savetxt(file, mcts.root.prior, header="\n" + self.role + " prior : ", fmt='%.4f', newline=", ") np.savetxt(file, mcts.root.prior, header="\n" + role + " prior : ", fmt='%.4f', newline=", ")
file.close() file.close()
temp = 1 temp = 1
prob = mcts.root.N ** temp / np.sum(mcts.root.N ** temp) prob = mcts.root.N ** temp / np.sum(mcts.root.N ** temp)
@ -140,6 +139,6 @@ class Game:
if __name__ == "__main__": if __name__ == "__main__":
game = Game(name="reversi", role="black", checkpoint_path=None) game = Game(name="reversi", checkpoint_path=None)
game.debug = True game.debug = True
game.think_play_move(utils.BLACK) game.think_play_move(utils.BLACK)

View File

@ -18,7 +18,6 @@ class Go:
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.size = kwargs['size'] self.size = kwargs['size']
self.komi = kwargs['komi'] self.komi = kwargs['komi']
self.role = kwargs['role']
def _flatten(self, vertex): def _flatten(self, vertex):
x, y = vertex x, y = vertex
@ -332,7 +331,7 @@ class Go:
if __name__ == "__main__": if __name__ == "__main__":
go = Go(size=9, komi=3.75, role = utils.BLACK) go = Go(size=9, komi=3.75)
endgame = [ endgame = [
1, 0, 1, 0, 1, 1, -1, 0, -1, 1, 0, 1, 0, 1, 1, -1, 0, -1,
1, 1, 1, 1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, -1, -1, -1,

View File

@ -1,10 +1,10 @@
import argparse import argparse
import subprocess
import sys import sys
import re import re
import Pyro4
import time import time
import os import os
from game import Game
from engine import GTPEngine
import utils import utils
from time import gmtime, strftime from time import gmtime, strftime
@ -48,65 +48,13 @@ if __name__ == '__main__':
if args.white_weight_path is not None and (not os.path.exists(args.white_weight_path)): if args.white_weight_path is not None and (not os.path.exists(args.white_weight_path)):
raise ValueError("Can't find the network weights for white player.") raise ValueError("Can't find the network weights for white player.")
# kill the old server game = Game(name=args.game,
# kill_old_server = subprocess.Popen(['killall', 'pyro4-ns']) black_checkpoint_path=args.black_weight_path,
# print "kill the old pyro4 name server, the return code is : " + str(kill_old_server.wait()) white_checkpoint_path=args.white_weight_path,
# time.sleep(1) debug=args.debug)
engine = GTPEngine(game_obj=game, name='tianshou', version=0)
# start a name server if no name server exists
if len(os.popen('ps aux | grep pyro4-ns | grep -v grep').readlines()) == 0:
start_new_server = subprocess.Popen(['pyro4-ns', '&'])
print("Start Name Sever : " + str(start_new_server.pid)) # + str(start_new_server.wait())
time.sleep(1)
# start two different player with different network weights.
server_list = subprocess.check_output(['pyro4-nsc', 'list'])
current_time = strftime("%Y%m%d_%H%M%S", gmtime())
black_role_name = 'black' + current_time
white_role_name = 'white' + current_time
black_player = subprocess.Popen(
['python', '-u', 'player.py', '--game=' + args.game, '--role=' + black_role_name,
'--checkpoint_path=' + str(args.black_weight_path), '--debug=' + str(args.debug)],
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
bp_output = black_player.stdout.readline()
bp_message = bp_output
# '' means player.py failed to start, "Start requestLoop" means player.py start successfully
while bp_output != '' and "Start requestLoop" not in bp_output:
bp_output = black_player.stdout.readline()
bp_message += bp_output
print("============ " + black_role_name + " message ============" + "\n" + bp_message),
white_player = subprocess.Popen(
['python', '-u', 'player.py', '--game=' + args.game, '--role=' + white_role_name,
'--checkpoint_path=' + str(args.white_weight_path), '--debug=' + str(args.debug)],
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
wp_output = white_player.stdout.readline()
wp_message = wp_output
while wp_output != '' and "Start requestLoop" not in wp_output:
wp_output = white_player.stdout.readline()
wp_message += wp_output
print("============ " + white_role_name + " message ============" + "\n" + wp_message),
server_list = ""
while (black_role_name not in server_list) or (white_role_name not in server_list):
if python_version < (3, 0):
# TODO : @renyong what is the difference between those two options?
server_list = subprocess.check_output(['pyro4-nsc', 'list'])
else:
server_list = subprocess.check_output(['pyro4-nsc', 'list'])
print("Waiting for the server start...")
time.sleep(1)
print(server_list)
print("Start black player at : " + str(black_player.pid))
print("Start white player at : " + str(white_player.pid))
data = Data() data = Data()
player = [None] * 2
player[0] = Pyro4.Proxy("PYRONAME:" + black_role_name)
player[1] = Pyro4.Proxy("PYRONAME:" + white_role_name)
role = ["BLACK", "WHITE"] role = ["BLACK", "WHITE"]
color = ['b', 'w'] color = ['b', 'w']
@ -119,7 +67,7 @@ if __name__ == '__main__':
game_num = 0 game_num = 0
try: try:
while True: while True:
# while game_num < evaluate_rounds: #while game_num < evaluate_rounds:
start_time = time.time() start_time = time.time()
num = 0 num = 0
pass_flag = [False, False] pass_flag = [False, False]
@ -127,7 +75,7 @@ if __name__ == '__main__':
# end the game if both palyer chose to pass, or play too much turns # end the game if both palyer chose to pass, or play too much turns
while not (pass_flag[0] and pass_flag[1]) and num < size[args.game] ** 2 * 2: while not (pass_flag[0] and pass_flag[1]) and num < size[args.game] ** 2 * 2:
turn = num % 2 turn = num % 2
board = player[turn].run_cmd(str(num) + ' show_board') board = engine.run_cmd(str(num) + ' show_board')
board = eval(board[board.index('['):board.index(']') + 1]) board = eval(board[board.index('['):board.index(']') + 1])
for i in range(size[args.game]): for i in range(size[args.game]):
for j in range(size[args.game]): for j in range(size[args.game]):
@ -135,7 +83,7 @@ if __name__ == '__main__':
print "\n", print "\n",
data.boards.append(board) data.boards.append(board)
start_time = time.time() start_time = time.time()
move = player[turn].run_cmd(str(num) + ' genmove ' + color[turn])[:-1] move = engine.run_cmd(str(num) + ' genmove ' + color[turn])[:-1]
print("\n" + role[turn] + " : " + str(move)), print("\n" + role[turn] + " : " + str(move)),
num += 1 num += 1
match = re.search(pattern, move) match = re.search(pattern, move)
@ -147,21 +95,19 @@ if __name__ == '__main__':
# print "no match" # print "no match"
play_or_pass = ' PASS' play_or_pass = ' PASS'
pass_flag[turn] = True pass_flag[turn] = True
result = player[1 - turn].run_cmd(str(num) + ' play ' + color[turn] + ' ' + play_or_pass + '\n') prob = engine.run_cmd(str(num) + ' get_prob')
prob = player[turn].run_cmd(str(num) + ' get_prob')
prob = space.sub(',', prob[prob.index('['):prob.index(']') + 1]) prob = space.sub(',', prob[prob.index('['):prob.index(']') + 1])
prob = prob.replace('[,', '[') prob = prob.replace('[,', '[')
prob = prob.replace('],', ']') prob = prob.replace('],', ']')
prob = eval(prob) prob = eval(prob)
data.probs.append(prob) data.probs.append(prob)
score = player[0].run_cmd(str(num) + ' get_score') score = engine.run_cmd(str(num) + ' get_score')
print("Finished : {}".format(score.split(" ")[1])) print("Finished : {}".format(score.split(" ")[1]))
if eval(score.split(" ")[1]) > 0: if eval(score.split(" ")[1]) > 0:
data.winner = utils.BLACK data.winner = utils.BLACK
if eval(score.split(" ")[1]) < 0: if eval(score.split(" ")[1]) < 0:
data.winner = utils.WHITE data.winner = utils.WHITE
player[0].run_cmd(str(num) + ' clear_board') engine.run_cmd(str(num) + ' clear_board')
player[1].run_cmd(str(num) + ' clear_board')
file_list = os.listdir(args.data_path) file_list = os.listdir(args.data_path)
current_time = strftime("%Y%m%d_%H%M%S", gmtime()) current_time = strftime("%Y%m%d_%H%M%S", gmtime())
if os.path.exists(args.data_path + current_time + ".pkl"): if os.path.exists(args.data_path + current_time + ".pkl"):
@ -173,7 +119,3 @@ if __name__ == '__main__':
game_num += 1 game_num += 1
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
subprocess.call(["kill", "-9", str(black_player.pid)])
subprocess.call(["kill", "-9", str(white_player.pid)])
print("Kill all player, finish all game.")

View File

@ -1,42 +0,0 @@
import argparse
import Pyro4
from game import Game
from engine import GTPEngine
@Pyro4.expose
class Player(object):
"""
This is the class which defines the object called by Pyro4 (Python remote object).
It passes the command to our engine, and return the result.
"""
def __init__(self, **kwargs):
self.role = kwargs['role']
self.engine = kwargs['engine']
def run_cmd(self, command):
return self.engine.run_cmd(command)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--checkpoint_path", type=str, default="None")
parser.add_argument("--role", type=str, default="unknown")
parser.add_argument("--debug", type=str, default="False")
parser.add_argument("--game", type=str, default="go")
args = parser.parse_args()
if args.checkpoint_path == 'None':
args.checkpoint_path = None
game = Game(name=args.game, role=args.role,
checkpoint_path=args.checkpoint_path,
debug=eval(args.debug))
engine = GTPEngine(game_obj=game, name='tianshou', version=0)
daemon = Pyro4.Daemon() # make a Pyro daemon
ns = Pyro4.locateNS() # find the name server
player = Player(role=args.role, engine=engine)
print("Init " + args.role + " player finished")
uri = daemon.register(player) # register the greeting maker as a Pyro object
print("Start on name " + args.role)
ns.register(args.role, uri) # register the object with a name in the name server
print("Start requestLoop " + str(uri))
daemon.requestLoop() # start the event loop of the server to wait for calls