diff --git a/AlphaGo/.gitignore b/AlphaGo/.gitignore index ff61326..38ff946 100644 --- a/AlphaGo/.gitignore +++ b/AlphaGo/.gitignore @@ -1,5 +1,5 @@ data checkpoints -checkpoints_origin +random *.log *.txt diff --git a/AlphaGo/engine.py b/AlphaGo/engine.py index b662dbd..d298aea 100644 --- a/AlphaGo/engine.py +++ b/AlphaGo/engine.py @@ -13,8 +13,6 @@ import utils class GTPEngine(): def __init__(self, **kwargs): - self.size = 9 - self.komi = 6.5 try: self._game = kwargs['game_obj'] self._game.clear() @@ -143,11 +141,9 @@ class GTPEngine(): self.disconnect = True return None, True - def cmd_boardsize(self, args, **kwargs): - if args.isdigit(): - size = int(args) - self.size = size - self._game.set_size(size) + def cmd_boardsize(self, board_size, **kwargs): + if board_size.isdigit(): + self._game.set_size(int(board_size)) return None, True else: return 'non digit size', False @@ -156,11 +152,9 @@ class GTPEngine(): self._game.clear() return None, True - def cmd_komi(self, args, **kwargs): + def cmd_komi(self, komi, **kwargs): try: - komi = float(args) - self.komi = komi - self._game.set_komi(komi) + self._game.set_komi(float(komi)) return None, True except ValueError: raise ValueError("syntax error") diff --git a/AlphaGo/play.py b/AlphaGo/play.py index 6b57b86..884d2ab 100644 --- a/AlphaGo/play.py +++ b/AlphaGo/play.py @@ -5,6 +5,8 @@ import re import Pyro4 import time import os +import utils +from time import gmtime, strftime python_version = sys.version_info @@ -13,8 +15,6 @@ if python_version < (3, 0): else: import _pickle as cPickle - - class Data(object): def __init__(self): self.boards = [] @@ -45,9 +45,9 @@ if __name__ == '__main__': # black_weight_path = "./checkpoints" # white_weight_path = "./checkpoints_origin" if args.black_weight_path is not None and (not os.path.exists(args.black_weight_path)): - raise ValueError("Can't not find the network weights for black player.") + raise ValueError("Can't find the network weights for black player.") if args.white_weight_path is not None and (not os.path.exists(args.white_weight_path)): - raise ValueError("Can't not find the network weights for white player.") + raise ValueError("Can't find the network weights for white player.") # kill the old server # kill_old_server = subprocess.Popen(['killall', 'pyro4-ns']) @@ -86,27 +86,29 @@ if __name__ == '__main__': black_role_name = 'black' + str(args.id) white_role_name = 'white' + str(args.id) - agent_v0 = subprocess.Popen( + #TODO : check if we can get the output of player from the stdout, for debug convenience + black_player = subprocess.Popen( ['python', '-u', 'player.py', '--game=' + args.game, '--role=' + black_role_name, '--checkpoint_path=' + str(args.black_weight_path), '--debug=' + str(args.debug)], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - agent_v1 = subprocess.Popen( + white_player = subprocess.Popen( ['python', '-u', 'player.py', '--game=' + args.game, '--role=' + white_role_name, - '--checkpoint_path=' + str(args.white_weight_path), '--debug=' + str(args.debug)], + '--checkpoint_path=' + str(args.white_weight_path), '--debug=' + str(args.debug)], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) server_list = "" while (black_role_name not in server_list) or (white_role_name not in server_list): if python_version < (3, 0): + # TODO : @renyong what is the difference between those two options? server_list = subprocess.check_output(['pyro4-nsc', 'list']) else: server_list = subprocess.check_output(['pyro4-nsc', 'list']) print("Waiting for the server start...") time.sleep(1) print(server_list) - print("Start black player at : " + str(agent_v0.pid)) - print("Start white player at : " + str(agent_v1.pid)) + print("Start black player at : " + str(black_player.pid)) + print("Start white player at : " + str(white_player.pid)) data = Data() player = [None] * 2 @@ -121,7 +123,7 @@ if __name__ == '__main__': size = {"go":9, "reversi":8} show = ['.', 'X', 'O'] - evaluate_rounds = 1 + evaluate_rounds = 100 game_num = 0 try: #while True: @@ -141,8 +143,8 @@ if __name__ == '__main__': print "\n", data.boards.append(board) start_time = time.time() - move = player[turn].run_cmd(str(num) + ' genmove ' + color[turn] + '\n') - print(role[turn] + " : " + str(move)), + move = player[turn].run_cmd(str(num) + ' genmove ' + color[turn])[:-1] + print("\n" + role[turn] + " : " + str(move)), num += 1 match = re.search(pattern, move) if match is not None: @@ -160,29 +162,23 @@ if __name__ == '__main__': prob = prob.replace('],', ']') prob = eval(prob) data.probs.append(prob) - score = player[turn].run_cmd(str(num) + ' get_score') + score = player[0].run_cmd(str(num) + ' get_score') print("Finished : {}".format(score.split(" ")[1])) - # TODO: generalize the player if eval(score.split(" ")[1]) > 0: - data.winner = 1 + data.winner = utils.BLACK if eval(score.split(" ")[1]) < 0: - data.winner = -1 + data.winner = utils.WHITE player[0].run_cmd(str(num) + ' clear_board') player[1].run_cmd(str(num) + ' clear_board') file_list = os.listdir(args.data_path) - if not file_list: - data_num = 0 - else: - file_list.sort(key=lambda file: os.path.getmtime(args.data_path + file) if not os.path.isdir( - args.data_path + file) else 0) - data_num = eval(file_list[-1][:-4]) + 1 - with open("./data/" + str(data_num) + ".pkl", "wb") as file: + current_time = strftime("%Y%m%d_%H%M%S", gmtime()) + with open(args.data_path + current_time + ".pkl", "wb") as file: picklestring = cPickle.dump(data, file) data.reset() game_num += 1 except KeyboardInterrupt: pass - subprocess.call(["kill", "-9", str(agent_v0.pid)]) - subprocess.call(["kill", "-9", str(agent_v1.pid)]) + subprocess.call(["kill", "-9", str(black_player.pid)]) + subprocess.call(["kill", "-9", str(white_player.pid)]) print("Kill all player, finish all game.") diff --git a/AlphaGo/player.py b/AlphaGo/player.py index a8f61c1..b93c124 100644 --- a/AlphaGo/player.py +++ b/AlphaGo/player.py @@ -1,8 +1,5 @@ import argparse -import time -import sys import Pyro4 - from game import Game from engine import GTPEngine @@ -17,10 +14,8 @@ class Player(object): self.engine = kwargs['engine'] def run_cmd(self, command): - #return "inside the Player of player.py" return self.engine.run_cmd(command) - if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--checkpoint_path", type=str, default=None) @@ -29,12 +24,7 @@ if __name__ == '__main__': parser.add_argument("--game", type=str, default=False) args = parser.parse_args() - if args.checkpoint_path == 'None': - args.checkpoint_path = None - debug = False - if args.debug == "True": - debug = True - game = Game(name=args.game, role=args.role, checkpoint_path=args.checkpoint_path, debug=debug) + game = Game(name=args.game, role=args.role, checkpoint_path=eval(args.checkpoint_path), debug=eval(args.debug)) engine = GTPEngine(game_obj=game, name='tianshou', version=0) daemon = Pyro4.Daemon() # make a Pyro daemon @@ -43,7 +33,7 @@ if __name__ == '__main__': print "Init " + args.role + " player finished" uri = daemon.register(player) # register the greeting maker as a Pyro object print "Start on name " + args.role - ns.register(args.role, uri) # register the object with a name in the name server + ns.register(args.role, uri) # register the object with a name in the name server print "Start Request Loop " + str(uri) daemon.requestLoop() # start the event loop of the server to wait for calls