modify AlphaGo

This commit is contained in:
Tongzheng Ren 2017-11-08 08:32:07 +08:00
parent a38ecabc59
commit b382bd8d31
2 changed files with 108 additions and 84 deletions

View File

@ -1,6 +1,7 @@
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import time import time
import os
import multi_gpu import multi_gpu
import tensorflow.contrib.layers as layers import tensorflow.contrib.layers as layers
@ -55,16 +56,11 @@ train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
saver = tf.train.Saver(max_to_keep=10, var_list=var_list) saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
def train(): def train():
data = np.load("data.npz") data_path = "/home/tongzheng/data/"
boards = data["boards"] data_name = os.listdir("/home/tongzheng/data/")
wins = data["wins"]
ps = data["ps"]
print (boards.shape)
print (wins.shape)
print (ps.shape)
epochs = 100 epochs = 100
batch_size = 32 batch_size = 32
batch_num = boards.shape[0] // batch_size
result_path = "./results/" result_path = "./results/"
with multi_gpu.create_session() as sess: with multi_gpu.create_session() as sess:
sess.run(tf.global_variables_initializer()) sess.run(tf.global_variables_initializer())
@ -73,26 +69,36 @@ def train():
print('Restoring model from {}...'.format(ckpt_file)) print('Restoring model from {}...'.format(ckpt_file))
saver.restore(sess, ckpt_file) saver.restore(sess, ckpt_file)
for epoch in range(epochs): for epoch in range(epochs):
time_train = -time.time() for name in data_name:
index = np.arange(boards.shape[0]) data = np.load(data_path + name)
np.random.shuffle(index) boards = data["boards"]
losses = [] wins = data["wins"]
regs = [] ps = data["ps"]
for iter in range(batch_num): print (boards.shape)
_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]], print (wins.shape)
z:wins[index[iter*batch_size:(iter+1)*batch_size]], print (ps.shape)
pi:ps[index[iter*batch_size:(iter+1)*batch_size]], batch_num = boards.shape[0] // batch_size
is_training:True}) index = np.arange(boards.shape[0])
losses.append(l) np.random.shuffle(index)
regs.append(r) losses = []
if iter % 1 == 0: regs = []
print("Epoch: {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs)))) time_train = -time.time()
time_train=-time.time() for iter in range(batch_num):
losses = [] _, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]],
regs = [] z:wins[index[iter*batch_size:(iter+1)*batch_size]],
if iter % 20 == 0: pi:ps[index[iter*batch_size:(iter+1)*batch_size]],
save_path = "Epoch{}.Iteration{}.ckpt".format(epoch, iter) is_training:True})
saver.save(sess, result_path + save_path) losses.append(l)
regs.append(r)
if iter % 1 == 0:
print("Epoch: {}, Part {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, name, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs))))
time_train=-time.time()
losses = []
regs = []
if iter % 20 == 0:
save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter)
saver.save(sess, result_path + save_path)
del data, boards, wins, ps
def forward(board): def forward(board):
result_path = "./results/" result_path = "./results/"
@ -106,5 +112,5 @@ def forward(board):
raise ValueError("No model loaded") raise ValueError("No model loaded")
return sess.run([p,v], feed_dict={x:board}) return sess.run([p,v], feed_dict={x:board})
if __name__='main': if __name__=="__main__":
train() train()

View File

@ -1,65 +1,83 @@
import os import os
import threading
import numpy as np import numpy as np
path = "/raid/tongzheng/AG/self_play_204/" path = "/home/yama/leela-zero/data/npz-files/"
name = os.listdir(path) name = os.listdir(path)
boards = np.zeros([0, 19, 19, 17]) print(len(name))
wins = np.zeros([0, 1]) thread_num = 17
ps = np.zeros([0, 362]) batch_num = len(name) // thread_num
for n in name: def integrate(name, index):
data = np.load(path + n) boards = np.zeros([0, 19, 19, 17])
board = data["boards"] wins = np.zeros([0, 1])
win = data["win"] ps = np.zeros([0, 362])
p = data["p"] for n in name:
# board = np.zeros([0, 19, 19, 17]) data = np.load(path + n)
# win = np.zeros([0, 1]) board = data["boards"]
# p = np.zeros([0, 362]) win = data["win"]
# for i in range(data["boards"].shape[3]): p = data["p"]
# board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0) # board = np.zeros([0, 19, 19, 17])
# win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0) # win = np.zeros([0, 1])
# p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0) # p = np.zeros([0, 362])
boards = np.concatenate([boards, board], axis=0) # for i in range(data["boards"].shape[3]):
wins = np.concatenate([wins, win], axis=0) # board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0)
ps = np.concatenate([ps, p], axis=0) # win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0)
print("Finish " + n) # p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0)
boards = np.concatenate([boards, board], axis=0)
wins = np.concatenate([wins, win], axis=0)
ps = np.concatenate([ps, p], axis=0)
# print("Finish " + n)
print ("Integration {} Finished!".format(index))
board_ori = boards
win_ori = wins
p_ori = ps
for i in range(1, 3):
board = np.rot90(board_ori, i, (1, 2))
p = np.concatenate(
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
axis=1)
boards = np.concatenate([boards, board], axis=0)
wins = np.concatenate([wins, win_ori], axis=0)
ps = np.concatenate([ps, p], axis=0)
board_ori = boards board = board_ori[:, ::-1]
win_ori = wins p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
p_ori = ps axis=1)
for i in range(1, 3): boards = np.concatenate([boards, board], axis=0)
board = np.rot90(board_ori, i, (1, 2)) wins = np.concatenate([wins, win_ori], axis=0)
p = np.concatenate( ps = np.concatenate([ps, p], axis=0)
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1)
boards = np.concatenate([boards, board], axis=0)
wins = np.concatenate([wins, win_ori], axis=0)
ps = np.concatenate([ps, p], axis=0)
board = board_ori[:, ::-1] board = board_ori[:, :, ::-1]
p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1) p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
boards = np.concatenate([boards, board], axis=0) axis=1)
wins = np.concatenate([wins, win_ori], axis=0) boards = np.concatenate([boards, board], axis=0)
ps = np.concatenate([ps, p], axis=0) wins = np.concatenate([wins, win_ori], axis=0)
ps = np.concatenate([ps, p], axis=0)
board = board_ori[:, :, ::-1] board = board_ori[:, ::-1]
p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], p = np.concatenate(
axis=1) [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
boards = np.concatenate([boards, board], axis=0) axis=1)
wins = np.concatenate([wins, win_ori], axis=0) boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0)
ps = np.concatenate([ps, p], axis=0) wins = np.concatenate([wins, win_ori], axis=0)
ps = np.concatenate([ps, p], axis=0)
board = board_ori[:, ::-1] board = board_ori[:, :, ::-1]
p = np.concatenate([np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1,2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1) p = np.concatenate(
boards = np.concatenate([boards, np.rot90(board, 1, (1,2))], axis=0) [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361),
wins = np.concatenate([wins, win_ori], axis=0) p_ori[:, -1].reshape(-1, 1)],
ps = np.concatenate([ps, p], axis=0) axis=1)
boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0)
wins = np.concatenate([wins, win_ori], axis=0)
ps = np.concatenate([ps, p], axis=0)
board = board_ori[:, :, ::-1] np.savez("/home/tongzheng/data/data-" + str(index), boards=boards, wins=wins, ps=ps)
p = np.concatenate([np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1,2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], print ("Thread {} has finished.".format(index))
axis=1) thread_list = list()
boards = np.concatenate([boards, np.rot90(board, 1, (1,2))], axis=0) for i in range(thread_num):
wins = np.concatenate([wins, win_ori], axis=0) thread_list.append(threading.Thread(target=integrate, args=(name[batch_num * i:batch_num * (i + 1)], i,)))
ps = np.concatenate([ps, p], axis=0) for thread in thread_list:
thread.start()
np.savez("data", boards=boards, wins=wins, ps=ps) for thread in thread_list:
thread.join()