Tianshou/AlphaGo/play.py

from __future__ import division
import argparse
import sys
import re
import time
import os
import threading
from game import Game
from engine import GTPEngine
from utils import Data
import utils
from time import gmtime, strftime

python_version = sys.version_info

if python_version < (3, 0):
    import cPickle
else:
    import _pickle as cPickle


def play(engine, data_path):
    data = Data()
    role = ["BLACK", "WHITE"]
    color = ['b', 'w']

    pattern = "[A-Z]{1}[0-9]{1}"
    space = re.compile("\s+")
    size = {"go": 9, "reversi": 8}
    show = ['.', 'X', 'O']

    evaluate_rounds = 5
    game_num = 0
    total_time = 0
    f=open('time.txt','w')
    #while True:
    while game_num < evaluate_rounds:
        start = time.time()
        engine._game.model.check_latest_model()
        num = 0
        pass_flag = [False, False]
        print("Start game {}".format(game_num))
        # end the game if both palyer chose to pass, or play too much turns
        while not (pass_flag[0] and pass_flag[1]) and num < size[engine._game.name] ** 2 * 2:
            turn = num % 2
            board = engine.run_cmd(str(num) + ' show_board')
            board = eval(board[board.index('['):board.index(']') + 1])
            for i in range(size[engine._game.name]):
                for j in range(size[engine._game.name]):
                    print show[board[i * size[engine._game.name] + j]] + " ",
                print "\n",
            data.boards.append(board)
            move = engine.run_cmd(str(num) + ' genmove ' + color[turn])[:-1]
            print("\n" + role[turn] + " : " + str(move)),
            num += 1
            match = re.search(pattern, move)
            if match is not None:
                # print "match : " + str(match.group())
                pass_flag[turn] = False
            else:
                # print "no match"
                pass_flag[turn] = True
            prob = engine.run_cmd(str(num) + ' get_prob')
            prob = space.sub(',', prob[prob.index('['):prob.index(']') + 1])
            prob = prob.replace('[,', '[')
            prob = prob.replace('],', ']')
            prob = eval(prob)
            data.probs.append(prob)
        score = engine.run_cmd(str(num) + ' get_score')
        print("Finished : {}".format(score.split(" ")[1]))
        if eval(score.split(" ")[1]) > 0:
            data.winner = utils.BLACK
        if eval(score.split(" ")[1]) < 0:
            data.winner = utils.WHITE
        engine.run_cmd(str(num) + ' clear_board')
        current_time = strftime("%Y%m%d_%H%M%S", gmtime())
        if os.path.exists(data_path + current_time + ".pkl"):
            time.sleep(1)
            current_time = strftime("%Y%m%d_%H%M%S", gmtime())
        with open(data_path + current_time + ".pkl", "wb") as file:
            cPickle.dump(data, file)
        data.reset()
        game_num += 1
        
        this_time = time.time() - start
        total += this_time
        f.write('time:'+ str(this_time)+'\n')
    f.write('Avg time:' + str(total/evaluate_rounds))
    f.close()
    

if __name__ == '__main__':
    """
    Starting two different players which load network weights to evaluate the winning ratio.
    Note that, this function requires the installation of the Pyro4 library.
    """
    # TODO : we should set the network path in a more configurable way.
    parser = argparse.ArgumentParser()
    parser.add_argument("--data_path", type=str, default="./data/")
    parser.add_argument("--black_weight_path", type=str, default=None)
    parser.add_argument("--white_weight_path", type=str, default=None)
    parser.add_argument("--save_path", type=str, default="./go/")
    parser.add_argument("--debug", action="store_true", default=False)
    parser.add_argument("--game", type=str, default="go")
    parser.add_argument("--train", action="store_true", default=False)
    args = parser.parse_args()

    if not os.path.exists(args.data_path):
        os.mkdir(args.data_path)
    # black_weight_path = "./checkpoints"
    # white_weight_path = "./checkpoints_origin"
    if args.black_weight_path is not None and (not os.path.exists(args.black_weight_path)):
        raise ValueError("Can't find the network weights for black player.")
    if args.white_weight_path is not None and (not os.path.exists(args.white_weight_path)):
        raise ValueError("Can't find the network weights for white player.")

    game = Game(name=args.game,
                black_checkpoint_path=args.black_weight_path,
                white_checkpoint_path=args.white_weight_path,
                debug=args.debug)
    engine = GTPEngine(game_obj=game, name='tianshou', version=0)

    thread_list = []
    thread_train = threading.Thread(target=game.model.train, args=("file",),
                                    kwargs={'data_path':args.data_path, 'batch_size':128, 'save_path':args.save_path})
    thread_play = threading.Thread(target=play, args=(engine, args.data_path))
    if args.train:
        thread_list.append(thread_train)
    thread_list.append(thread_play)

    for t in thread_list:
        t.start()

    for t in thread_list:
        t.join()
add deepcopy for hash, add some testing 2018-01-17 15:54:46 +08:00			`from __future__ import division`
start a random player if checkpoint path is not specified 2017-12-19 15:39:31 +08:00			`import argparse`
play 2017-12-09 21:41:11 +08:00			`import sys`
			`import re`
			`import time`
check if the network weights exists for every player 2017-12-16 14:55:19 +08:00			`import os`
add multi-thread for end-to-end training 2018-01-13 15:57:41 +08:00			`import threading`
replace two isolated player process by two different set of variables in the tf graph 2018-01-10 23:27:17 +08:00			`from game import Game`
			`from engine import GTPEngine`
refactor code to avoid memory leak 2018-01-11 17:02:36 +08:00			`from utils import Data`
delete unused code 2017-12-26 19:29:35 +08:00			`import utils`
			`from time import gmtime, strftime`
add '()' to support python3 2017-12-25 15:33:17 +08:00
			`python_version = sys.version_info`

			`if python_version < (3, 0):`
			`import cPickle`
			`else:`
			`import _pickle as cPickle`

modify game.py for multi-player 2018-01-09 20:09:48 +08:00
add multi-thread for end-to-end training 2018-01-13 15:57:41 +08:00			`def play(engine, data_path):`
			`data = Data()`
			`role = ["BLACK", "WHITE"]`
			`color = ['b', 'w']`

			`pattern = "[A-Z]{1}[0-9]{1}"`
			`space = re.compile("\s+")`
			`size = {"go": 9, "reversi": 8}`
			`show = ['.', 'X', 'O']`

add deepcopy for hash, add some testing 2018-01-17 15:54:46 +08:00			`evaluate_rounds = 5`
add multi-thread for end-to-end training 2018-01-13 15:57:41 +08:00			`game_num = 0`
add deepcopy for hash, add some testing 2018-01-17 15:54:46 +08:00			`total_time = 0`
			`f=open('time.txt','w')`
add union set for do_move and is_valid 2018-01-16 14:10:56 +08:00			`#while True:`
			`while game_num < evaluate_rounds:`
add deepcopy for hash, add some testing 2018-01-17 15:54:46 +08:00			`start = time.time()`
add multi-thread for end-to-end training 2018-01-13 15:57:41 +08:00			`engine._game.model.check_latest_model()`
			`num = 0`
			`pass_flag = [False, False]`
			`print("Start game {}".format(game_num))`
			`# end the game if both palyer chose to pass, or play too much turns`
			`while not (pass_flag[0] and pass_flag[1]) and num < size[engine._game.name] ** 2 * 2:`
			`turn = num % 2`
			`board = engine.run_cmd(str(num) + ' show_board')`
			`board = eval(board[board.index('['):board.index(']') + 1])`
			`for i in range(size[engine._game.name]):`
			`for j in range(size[engine._game.name]):`
			`print show[board[i * size[engine._game.name] + j]] + " ",`
			`print "\n",`
			`data.boards.append(board)`
			`move = engine.run_cmd(str(num) + ' genmove ' + color[turn])[:-1]`
			`print("\n" + role[turn] + " : " + str(move)),`
			`num += 1`
			`match = re.search(pattern, move)`
			`if match is not None:`
			`# print "match : " + str(match.group())`
			`pass_flag[turn] = False`
			`else:`
			`# print "no match"`
			`pass_flag[turn] = True`
			`prob = engine.run_cmd(str(num) + ' get_prob')`
			`prob = space.sub(',', prob[prob.index('['):prob.index(']') + 1])`
			`prob = prob.replace('[,', '[')`
			`prob = prob.replace('],', ']')`
			`prob = eval(prob)`
			`data.probs.append(prob)`
			`score = engine.run_cmd(str(num) + ' get_score')`
			`print("Finished : {}".format(score.split(" ")[1]))`
			`if eval(score.split(" ")[1]) > 0:`
			`data.winner = utils.BLACK`
			`if eval(score.split(" ")[1]) < 0:`
			`data.winner = utils.WHITE`
			`engine.run_cmd(str(num) + ' clear_board')`
			`current_time = strftime("%Y%m%d_%H%M%S", gmtime())`
			`if os.path.exists(data_path + current_time + ".pkl"):`
			`time.sleep(1)`
			`current_time = strftime("%Y%m%d_%H%M%S", gmtime())`
			`with open(data_path + current_time + ".pkl", "wb") as file:`
			`cPickle.dump(data, file)`
			`data.reset()`
			`game_num += 1`
add deepcopy for hash, add some testing 2018-01-17 15:54:46 +08:00
			`this_time = time.time() - start`
			`total += this_time`
			`f.write('time:'+ str(this_time)+'\n')`
			`f.write('Avg time:' + str(total/evaluate_rounds))`
			`f.close()`

add multi-thread for end-to-end training 2018-01-13 15:57:41 +08:00

start the player server in a more robost way. 2017-12-16 14:33:31 +08:00			`if __name__ == '__main__':`
check if the network weights exists for every player 2017-12-16 14:55:19 +08:00			`"""`
			`Starting two different players which load network weights to evaluate the winning ratio.`
			`Note that, this function requires the installation of the Pyro4 library.`
			`"""`
			`# TODO : we should set the network path in a more configurable way.`
start a random player if checkpoint path is not specified 2017-12-19 15:39:31 +08:00			`parser = argparse.ArgumentParser()`
modify play.py for better experience 2017-12-25 16:40:38 +08:00			`parser.add_argument("--data_path", type=str, default="./data/")`
start a random player if checkpoint path is not specified 2017-12-19 15:39:31 +08:00			`parser.add_argument("--black_weight_path", type=str, default=None)`
			`parser.add_argument("--white_weight_path", type=str, default=None)`
add multi-thread for end-to-end training 2018-01-13 15:57:41 +08:00			`parser.add_argument("--save_path", type=str, default="./go/")`
add an args to intrigue training 2018-01-13 15:59:57 +08:00			`parser.add_argument("--debug", action="store_true", default=False)`
modify play.py for better experience 2017-12-25 16:40:38 +08:00			`parser.add_argument("--game", type=str, default="go")`
add an args to intrigue training 2018-01-13 15:59:57 +08:00			`parser.add_argument("--train", action="store_true", default=False)`
start a random player if checkpoint path is not specified 2017-12-19 15:39:31 +08:00			`args = parser.parse_args()`

modify play.py for better experience 2017-12-25 16:40:38 +08:00			`if not os.path.exists(args.data_path):`
			`os.mkdir(args.data_path)`
start a random player if checkpoint path is not specified 2017-12-19 15:39:31 +08:00			`# black_weight_path = "./checkpoints"`
			`# white_weight_path = "./checkpoints_origin"`
			`if args.black_weight_path is not None and (not os.path.exists(args.black_weight_path)):`
delete unused code 2017-12-26 19:29:35 +08:00			`raise ValueError("Can't find the network weights for black player.")`
start a random player if checkpoint path is not specified 2017-12-19 15:39:31 +08:00			`if args.white_weight_path is not None and (not os.path.exists(args.white_weight_path)):`
delete unused code 2017-12-26 19:29:35 +08:00			`raise ValueError("Can't find the network weights for white player.")`
check if the network weights exists for every player 2017-12-16 14:55:19 +08:00
replace two isolated player process by two different set of variables in the tf graph 2018-01-10 23:27:17 +08:00			`game = Game(name=args.game,`
			`black_checkpoint_path=args.black_weight_path,`
			`white_checkpoint_path=args.white_weight_path,`
			`debug=args.debug)`
			`engine = GTPEngine(game_obj=game, name='tianshou', version=0)`
play 2017-12-09 21:41:11 +08:00
add multi-thread for end-to-end training 2018-01-13 15:57:41 +08:00			`thread_list = []`
			`thread_train = threading.Thread(target=game.model.train, args=("file",),`
			`kwargs={'data_path':args.data_path, 'batch_size':128, 'save_path':args.save_path})`
			`thread_play = threading.Thread(target=play, args=(engine, args.data_path))`
add an args to intrigue training 2018-01-13 15:59:57 +08:00			`if args.train:`
			`thread_list.append(thread_train)`
add multi-thread for end-to-end training 2018-01-13 15:57:41 +08:00			`thread_list.append(thread_play)`
supporting self-play between different version of neural netowrks 2017-12-10 20:23:10 +08:00
add multi-thread for end-to-end training 2018-01-13 15:57:41 +08:00			`for t in thread_list:`
			`t.start()`
play 2017-12-09 21:41:11 +08:00
add multi-thread for end-to-end training 2018-01-13 15:57:41 +08:00			`for t in thread_list:`
			`t.join()`