From 1ff8252e6d3fa73e32931f533601ccda81981249 Mon Sep 17 00:00:00 2001 From: rtz19970824 Date: Sat, 9 Dec 2017 21:41:11 +0800 Subject: [PATCH] play --- AlphaGo/engine.py | 7 ++++ AlphaGo/game.py | 4 +- AlphaGo/multi_gpu.py | 2 + AlphaGo/network_small.py | 55 ++++++++++++++----------- AlphaGo/play.py | 89 ++++++++++++++++++++++++++++++++++++++++ AlphaGo/strategy.py | 4 +- AlphaGo/test.py | 13 ++++-- 7 files changed, 143 insertions(+), 31 deletions(-) create mode 100644 AlphaGo/play.py diff --git a/AlphaGo/engine.py b/AlphaGo/engine.py index fa8ce0d..97f625b 100644 --- a/AlphaGo/engine.py +++ b/AlphaGo/engine.py @@ -182,6 +182,13 @@ class GTPEngine(): else: return 'unknown player', False + def cmd_get_score(self, args, **kwargs): + return self._game.executor.get_score(), None + + def cmd_show_board(self, args, **kwargs): + self._game.show_board() + return None, None + if __name__ == "main": game = Game() diff --git a/AlphaGo/game.py b/AlphaGo/game.py index 941401e..a20c6aa 100644 --- a/AlphaGo/game.py +++ b/AlphaGo/game.py @@ -181,11 +181,11 @@ class Executor: class Game: - def __init__(self, size=9, komi=6.5): + def __init__(self, size=9, komi=6.5, checkpoint_path=None): self.size = size self.komi = komi self.board = [utils.EMPTY] * (self.size * self.size) - self.strategy = strategy() + self.strategy = strategy(checkpoint_path) # self.strategy = None self.executor = Executor(game=self) self.history = [] diff --git a/AlphaGo/multi_gpu.py b/AlphaGo/multi_gpu.py index 55dfaa8..60c5f53 100644 --- a/AlphaGo/multi_gpu.py +++ b/AlphaGo/multi_gpu.py @@ -18,6 +18,8 @@ FLAGS = tf.flags.FLAGS def create_session(): config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) + config.gpu_options.allow_growth = True + return tf.Session(config=config) diff --git a/AlphaGo/network_small.py b/AlphaGo/network_small.py index 975cf96..2542ec4 100644 --- a/AlphaGo/network_small.py +++ b/AlphaGo/network_small.py @@ -9,6 +9,7 @@ import tensorflow.contrib.layers as layers import multi_gpu import time +import copy # os.environ["CUDA_VISIBLE_DEVICES"] = "1" os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' @@ -80,14 +81,15 @@ class Network(object): self.train_op = tf.train.RMSPropOptimizer(1e-4).minimize(self.total_loss) self.var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) self.saver = tf.train.Saver(max_to_keep=10, var_list=self.var_list) + self.sess = multi_gpu.create_session() def train(self): - data_path = "/home/tongzheng/data/" - data_name = os.listdir("/home/tongzheng/data/") + data_path = "./training_data/" + data_name = os.listdir(data_path) epochs = 100 batch_size = 128 - result_path = "./checkpoints/" + result_path = "./checkpoints_origin/" with multi_gpu.create_session() as sess: sess.run(tf.global_variables_initializer()) ckpt_file = tf.train.latest_checkpoint(result_path) @@ -112,7 +114,7 @@ class Network(object): time_train = -time.time() for iter in range(batch_num): lv, lp, r, value, prob, _ = sess.run( - [self.value_loss, self.policy_loss, self.reg, self.v, tf.nn.softmax(p), self.train_op], + [self.value_loss, self.policy_loss, self.reg, self.v, tf.nn.softmax(self.p), self.train_op], feed_dict={self.x: boards[ index[iter * batch_size:(iter + 1) * batch_size]], self.z: wins[index[ @@ -186,28 +188,35 @@ class Network(object): # # np.savetxt(pv_file, res[1][0], fmt="%.6f", newline=" ") # return res - def forward(self): + def forward(self, checkpoint_path): # checkpoint_path = "/home/tongzheng/tianshou/AlphaGo/checkpoints/" - sess = multi_gpu.create_session() - sess.run(tf.global_variables_initializer()) - # ckpt_file = tf.train.latest_checkpoint(checkpoint_path) - # if ckpt_file is not None: - # print('Restoring model from {}...'.format(ckpt_file)) - # self.saver.restore(sess, ckpt_file) - # print('Successfully loaded') - # else: - # raise ValueError("No model loaded") + # sess = multi_gpu.create_session() + # sess.run(tf.global_variables_initializer()) + ckpt_file = tf.train.latest_checkpoint(checkpoint_path) + if ckpt_file is not None: + # print('Restoring model from {}...'.format(ckpt_file)) + self.saver.restore(self.sess, ckpt_file) + # print('Successfully loaded') + else: + raise ValueError("No model loaded") # prior, value = sess.run([tf.nn.softmax(p), v], feed_dict={x: state, is_training: False}) # return prior, value - return sess + return self.sess if __name__ == '__main__': - state = np.random.randint(0, 1, [256, 9, 9, 17]) - net = Network() - sess = net.forward() - start_time = time.time() - for i in range(100): - sess.run([tf.nn.softmax(net.p), net.v], feed_dict={net.x: state, net.is_training: False}) - print("Step {}, use time {}".format(i, time.time() - start_time)) - start_time = time.time() + # state = np.random.randint(0, 1, [256, 9, 9, 17]) + # net = Network() + # net.train() + # sess = net.forward() + # start_time = time.time() + # for i in range(100): + # sess.run([tf.nn.softmax(net.p), net.v], feed_dict={net.x: state, net.is_training: False}) + # print("Step {}, use time {}".format(i, time.time() - start_time)) + # start_time = time.time() + net0 = Network() + sess0 = net0.forward("./checkpoints/") + print("Loaded") + while True: + pass + diff --git a/AlphaGo/play.py b/AlphaGo/play.py new file mode 100644 index 0000000..1d7f69f --- /dev/null +++ b/AlphaGo/play.py @@ -0,0 +1,89 @@ +import subprocess +import sys +import re +import time +pattern = "[A-Z]{1}[0-9]{1}" +size = 9 +agent_v1 = subprocess.Popen(['python', '-u', 'test.py'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) +agent_v0 = subprocess.Popen(['python', '-u', 'test.py', '--checkpoint_path=./checkpoints_origin/'], stdin=subprocess.PIPE, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + + +num = 0 +game_num = 0 +black_pass = False +white_pass = False + + +while game_num < 10: + print("Start game {}".format(game_num)) + while not (black_pass and white_pass) and num < size ** 2 * 2: + print(num) + if num % 2 == 0: + print('BLACK TURN') + agent_v1.stdin.write(str(num) + ' genmove b\n') + agent_v1.stdin.flush() + result = agent_v1.stdout.readline() + sys.stdout.write(result) + sys.stdout.flush() + num += 1 + match = re.search(pattern, result) + print("COPY BLACK") + if match is not None: + agent_v0.stdin.write(str(num) + ' play b ' + match.group() + '\n') + agent_v0.stdin.flush() + result = agent_v0.stdout.readline() + sys.stdout.flush() + else: + agent_v0.stdin.write(str(num) + ' play b PASS\n') + agent_v0.stdin.flush() + result = agent_v0.stdout.readline() + sys.stdout.flush() + if re.search("pass", result) is not None: + black_pass = True + else: + black_pass = False + else: + print('WHITE TURN') + agent_v0.stdin.write(str(num) + ' genmove w\n') + agent_v0.stdin.flush() + result = agent_v0.stdout.readline() + sys.stdout.write(result) + sys.stdout.flush() + num += 1 + match = re.search(pattern, result) + print("COPY WHITE") + if match is not None: + agent_v1.stdin.write(str(num) + ' play w ' + match.group() + '\n') + agent_v1.stdin.flush() + result = agent_v1.stdout.readline() + sys.stdout.flush() + else: + agent_v1.stdin.write(str(num) + ' play w PASS\n') + agent_v1.stdin.flush() + result = agent_v1.stdout.readline() + sys.stdout.flush() + if re.search("pass", result) is not None: + black_pass = True + else: + black_pass = False + + print("Finished") + print("\n") + + agent_v1.stdin.write('clear_board\n') + agent_v1.stdin.flush() + result = agent_v1.stdout.readline() + sys.stdout.flush() + + agent_v0.stdin.write('clear_board\n') + agent_v0.stdin.flush() + result = agent_v0.stdout.readline() + sys.stdout.flush() + + agent_v1.stdin.write('get_score\n') + agent_v1.stdin.flush() + result = agent_v1.stdout.readline() + sys.stdout.write(result) + sys.stdout.flush() + game_num += 1 diff --git a/AlphaGo/strategy.py b/AlphaGo/strategy.py index 590edb3..ff5d79f 100644 --- a/AlphaGo/strategy.py +++ b/AlphaGo/strategy.py @@ -224,10 +224,10 @@ class GoEnv: class strategy(object): - def __init__(self): + def __init__(self, checkpoint_path): self.simulator = GoEnv() self.net = network_small.Network() - self.sess = self.net.forward() + self.sess = self.net.forward(checkpoint_path) self.evaluator = lambda state: self.sess.run([tf.nn.softmax(self.net.p), self.net.v], feed_dict={self.net.x: state, self.net.is_training: False}) diff --git a/AlphaGo/test.py b/AlphaGo/test.py index d9a0915..16230b6 100644 --- a/AlphaGo/test.py +++ b/AlphaGo/test.py @@ -2,13 +2,18 @@ import sys from game import Game from engine import GTPEngine # import utils +import argparse +import time -game = Game() -engine = GTPEngine(game_obj=game, name='tianshou') -cmd = raw_input +parser = argparse.ArgumentParser() +parser.add_argument("--checkpoint_path", type=str, default="./checkpoints/") +args = parser.parse_args() + +game = Game(checkpoint_path=args.checkpoint_path) +engine = GTPEngine(game_obj=game, name='tianshou', version=0) while not engine.disconnect: - command = cmd() + command = sys.stdin.readline() result = engine.run_cmd(command) sys.stdout.write(result) sys.stdout.flush()