modify AlphaGo
This commit is contained in:
parent
a38ecabc59
commit
b382bd8d31
@ -1,6 +1,7 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import time
|
import time
|
||||||
|
import os
|
||||||
import multi_gpu
|
import multi_gpu
|
||||||
import tensorflow.contrib.layers as layers
|
import tensorflow.contrib.layers as layers
|
||||||
|
|
||||||
@ -55,16 +56,11 @@ train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
|
|||||||
var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
|
var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
|
||||||
saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
|
saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
|
||||||
def train():
|
def train():
|
||||||
data = np.load("data.npz")
|
data_path = "/home/tongzheng/data/"
|
||||||
boards = data["boards"]
|
data_name = os.listdir("/home/tongzheng/data/")
|
||||||
wins = data["wins"]
|
|
||||||
ps = data["ps"]
|
|
||||||
print (boards.shape)
|
|
||||||
print (wins.shape)
|
|
||||||
print (ps.shape)
|
|
||||||
epochs = 100
|
epochs = 100
|
||||||
batch_size = 32
|
batch_size = 32
|
||||||
batch_num = boards.shape[0] // batch_size
|
|
||||||
result_path = "./results/"
|
result_path = "./results/"
|
||||||
with multi_gpu.create_session() as sess:
|
with multi_gpu.create_session() as sess:
|
||||||
sess.run(tf.global_variables_initializer())
|
sess.run(tf.global_variables_initializer())
|
||||||
@ -73,26 +69,36 @@ def train():
|
|||||||
print('Restoring model from {}...'.format(ckpt_file))
|
print('Restoring model from {}...'.format(ckpt_file))
|
||||||
saver.restore(sess, ckpt_file)
|
saver.restore(sess, ckpt_file)
|
||||||
for epoch in range(epochs):
|
for epoch in range(epochs):
|
||||||
time_train = -time.time()
|
for name in data_name:
|
||||||
index = np.arange(boards.shape[0])
|
data = np.load(data_path + name)
|
||||||
np.random.shuffle(index)
|
boards = data["boards"]
|
||||||
losses = []
|
wins = data["wins"]
|
||||||
regs = []
|
ps = data["ps"]
|
||||||
for iter in range(batch_num):
|
print (boards.shape)
|
||||||
_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]],
|
print (wins.shape)
|
||||||
z:wins[index[iter*batch_size:(iter+1)*batch_size]],
|
print (ps.shape)
|
||||||
pi:ps[index[iter*batch_size:(iter+1)*batch_size]],
|
batch_num = boards.shape[0] // batch_size
|
||||||
is_training:True})
|
index = np.arange(boards.shape[0])
|
||||||
losses.append(l)
|
np.random.shuffle(index)
|
||||||
regs.append(r)
|
losses = []
|
||||||
if iter % 1 == 0:
|
regs = []
|
||||||
print("Epoch: {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs))))
|
time_train = -time.time()
|
||||||
time_train=-time.time()
|
for iter in range(batch_num):
|
||||||
losses = []
|
_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]],
|
||||||
regs = []
|
z:wins[index[iter*batch_size:(iter+1)*batch_size]],
|
||||||
if iter % 20 == 0:
|
pi:ps[index[iter*batch_size:(iter+1)*batch_size]],
|
||||||
save_path = "Epoch{}.Iteration{}.ckpt".format(epoch, iter)
|
is_training:True})
|
||||||
saver.save(sess, result_path + save_path)
|
losses.append(l)
|
||||||
|
regs.append(r)
|
||||||
|
if iter % 1 == 0:
|
||||||
|
print("Epoch: {}, Part {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, name, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs))))
|
||||||
|
time_train=-time.time()
|
||||||
|
losses = []
|
||||||
|
regs = []
|
||||||
|
if iter % 20 == 0:
|
||||||
|
save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter)
|
||||||
|
saver.save(sess, result_path + save_path)
|
||||||
|
del data, boards, wins, ps
|
||||||
|
|
||||||
def forward(board):
|
def forward(board):
|
||||||
result_path = "./results/"
|
result_path = "./results/"
|
||||||
@ -106,5 +112,5 @@ def forward(board):
|
|||||||
raise ValueError("No model loaded")
|
raise ValueError("No model loaded")
|
||||||
return sess.run([p,v], feed_dict={x:board})
|
return sess.run([p,v], feed_dict={x:board})
|
||||||
|
|
||||||
if __name__='main':
|
if __name__=="__main__":
|
||||||
train()
|
train()
|
||||||
|
128
AlphaGo/data.py
128
AlphaGo/data.py
@ -1,65 +1,83 @@
|
|||||||
import os
|
import os
|
||||||
|
import threading
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
path = "/raid/tongzheng/AG/self_play_204/"
|
path = "/home/yama/leela-zero/data/npz-files/"
|
||||||
name = os.listdir(path)
|
name = os.listdir(path)
|
||||||
boards = np.zeros([0, 19, 19, 17])
|
print(len(name))
|
||||||
wins = np.zeros([0, 1])
|
thread_num = 17
|
||||||
ps = np.zeros([0, 362])
|
batch_num = len(name) // thread_num
|
||||||
|
|
||||||
for n in name:
|
def integrate(name, index):
|
||||||
data = np.load(path + n)
|
boards = np.zeros([0, 19, 19, 17])
|
||||||
board = data["boards"]
|
wins = np.zeros([0, 1])
|
||||||
win = data["win"]
|
ps = np.zeros([0, 362])
|
||||||
p = data["p"]
|
for n in name:
|
||||||
# board = np.zeros([0, 19, 19, 17])
|
data = np.load(path + n)
|
||||||
# win = np.zeros([0, 1])
|
board = data["boards"]
|
||||||
# p = np.zeros([0, 362])
|
win = data["win"]
|
||||||
# for i in range(data["boards"].shape[3]):
|
p = data["p"]
|
||||||
# board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0)
|
# board = np.zeros([0, 19, 19, 17])
|
||||||
# win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0)
|
# win = np.zeros([0, 1])
|
||||||
# p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0)
|
# p = np.zeros([0, 362])
|
||||||
boards = np.concatenate([boards, board], axis=0)
|
# for i in range(data["boards"].shape[3]):
|
||||||
wins = np.concatenate([wins, win], axis=0)
|
# board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0)
|
||||||
ps = np.concatenate([ps, p], axis=0)
|
# win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0)
|
||||||
print("Finish " + n)
|
# p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0)
|
||||||
|
boards = np.concatenate([boards, board], axis=0)
|
||||||
|
wins = np.concatenate([wins, win], axis=0)
|
||||||
|
ps = np.concatenate([ps, p], axis=0)
|
||||||
|
# print("Finish " + n)
|
||||||
|
print ("Integration {} Finished!".format(index))
|
||||||
|
board_ori = boards
|
||||||
|
win_ori = wins
|
||||||
|
p_ori = ps
|
||||||
|
for i in range(1, 3):
|
||||||
|
board = np.rot90(board_ori, i, (1, 2))
|
||||||
|
p = np.concatenate(
|
||||||
|
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
||||||
|
axis=1)
|
||||||
|
boards = np.concatenate([boards, board], axis=0)
|
||||||
|
wins = np.concatenate([wins, win_ori], axis=0)
|
||||||
|
ps = np.concatenate([ps, p], axis=0)
|
||||||
|
|
||||||
board_ori = boards
|
board = board_ori[:, ::-1]
|
||||||
win_ori = wins
|
p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
||||||
p_ori = ps
|
axis=1)
|
||||||
for i in range(1, 3):
|
boards = np.concatenate([boards, board], axis=0)
|
||||||
board = np.rot90(board_ori, i, (1, 2))
|
wins = np.concatenate([wins, win_ori], axis=0)
|
||||||
p = np.concatenate(
|
ps = np.concatenate([ps, p], axis=0)
|
||||||
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1)
|
|
||||||
boards = np.concatenate([boards, board], axis=0)
|
|
||||||
wins = np.concatenate([wins, win_ori], axis=0)
|
|
||||||
ps = np.concatenate([ps, p], axis=0)
|
|
||||||
|
|
||||||
board = board_ori[:, ::-1]
|
board = board_ori[:, :, ::-1]
|
||||||
p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1)
|
p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
||||||
boards = np.concatenate([boards, board], axis=0)
|
axis=1)
|
||||||
wins = np.concatenate([wins, win_ori], axis=0)
|
boards = np.concatenate([boards, board], axis=0)
|
||||||
ps = np.concatenate([ps, p], axis=0)
|
wins = np.concatenate([wins, win_ori], axis=0)
|
||||||
|
ps = np.concatenate([ps, p], axis=0)
|
||||||
|
|
||||||
board = board_ori[:, :, ::-1]
|
board = board_ori[:, ::-1]
|
||||||
p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
p = np.concatenate(
|
||||||
axis=1)
|
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
||||||
boards = np.concatenate([boards, board], axis=0)
|
axis=1)
|
||||||
wins = np.concatenate([wins, win_ori], axis=0)
|
boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0)
|
||||||
ps = np.concatenate([ps, p], axis=0)
|
wins = np.concatenate([wins, win_ori], axis=0)
|
||||||
|
ps = np.concatenate([ps, p], axis=0)
|
||||||
|
|
||||||
board = board_ori[:, ::-1]
|
board = board_ori[:, :, ::-1]
|
||||||
p = np.concatenate([np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1,2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1)
|
p = np.concatenate(
|
||||||
boards = np.concatenate([boards, np.rot90(board, 1, (1,2))], axis=0)
|
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361),
|
||||||
wins = np.concatenate([wins, win_ori], axis=0)
|
p_ori[:, -1].reshape(-1, 1)],
|
||||||
ps = np.concatenate([ps, p], axis=0)
|
axis=1)
|
||||||
|
boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0)
|
||||||
|
wins = np.concatenate([wins, win_ori], axis=0)
|
||||||
|
ps = np.concatenate([ps, p], axis=0)
|
||||||
|
|
||||||
board = board_ori[:, :, ::-1]
|
np.savez("/home/tongzheng/data/data-" + str(index), boards=boards, wins=wins, ps=ps)
|
||||||
p = np.concatenate([np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1,2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
print ("Thread {} has finished.".format(index))
|
||||||
axis=1)
|
thread_list = list()
|
||||||
boards = np.concatenate([boards, np.rot90(board, 1, (1,2))], axis=0)
|
for i in range(thread_num):
|
||||||
wins = np.concatenate([wins, win_ori], axis=0)
|
thread_list.append(threading.Thread(target=integrate, args=(name[batch_num * i:batch_num * (i + 1)], i,)))
|
||||||
ps = np.concatenate([ps, p], axis=0)
|
for thread in thread_list:
|
||||||
|
thread.start()
|
||||||
np.savez("data", boards=boards, wins=wins, ps=ps)
|
for thread in thread_list:
|
||||||
|
thread.join()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user