minor fixed

This commit is contained in:
rtz19970824 2017-12-08 23:41:31 +08:00
parent 453e457452
commit bc49d466d1
4 changed files with 47 additions and 34 deletions

View File

@ -2,6 +2,7 @@ import os
import threading
import numpy as np
size = 9
path = "/home/yama/leela-zero/data/npz-files/"
name = os.listdir(path)
print(len(name))
@ -9,21 +10,21 @@ thread_num = 17
batch_num = len(name) // thread_num
def integrate(name, index):
boards = np.zeros([0, 19, 19, 17])
boards = np.zeros([0, size, size, 17])
wins = np.zeros([0, 1])
ps = np.zeros([0, 362])
ps = np.zeros([0, size**2 + 1])
for n in name:
data = np.load(path + n)
board = data["boards"]
win = data["win"]
p = data["p"]
# board = np.zeros([0, 19, 19, 17])
board = data["state"]
win = data["winner"]
p = data["prob"]
# board = np.zeros([0, size, size, 17])
# win = np.zeros([0, 1])
# p = np.zeros([0, 362])
# p = np.zeros([0, size**2 + 1])
# for i in range(data["boards"].shape[3]):
# board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0)
# board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, size, size, 17)], axis=0)
# win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0)
# p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0)
# p = np.concatenate([p, data["p"][:,i].reshape(-1, size**2 + 1)], axis=0)
boards = np.concatenate([boards, board], axis=0)
wins = np.concatenate([wins, win], axis=0)
ps = np.concatenate([ps, p], axis=0)
@ -35,21 +36,21 @@ def integrate(name, index):
for i in range(1, 3):
board = np.rot90(board_ori, i, (1, 2))
p = np.concatenate(
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
[np.rot90(p_ori[:, :-1].reshape(-1, size, size), i, (1, 2)).reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)],
axis=1)
boards = np.concatenate([boards, board], axis=0)
wins = np.concatenate([wins, win_ori], axis=0)
ps = np.concatenate([ps, p], axis=0)
board = board_ori[:, ::-1]
p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
p = np.concatenate([p_ori[:, :-1].reshape(-1, size, size)[:, ::-1].reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)],
axis=1)
boards = np.concatenate([boards, board], axis=0)
wins = np.concatenate([wins, win_ori], axis=0)
ps = np.concatenate([ps, p], axis=0)
board = board_ori[:, :, ::-1]
p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
p = np.concatenate([p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1].reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)],
axis=1)
boards = np.concatenate([boards, board], axis=0)
wins = np.concatenate([wins, win_ori], axis=0)
@ -57,7 +58,7 @@ def integrate(name, index):
board = board_ori[:, ::-1]
p = np.concatenate(
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
[np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, ::-1], 1, (1, 2)).reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)],
axis=1)
boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0)
wins = np.concatenate([wins, win_ori], axis=0)
@ -65,14 +66,14 @@ def integrate(name, index):
board = board_ori[:, :, ::-1]
p = np.concatenate(
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361),
[np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1], 1, (1, 2)).reshape(-1, size**2),
p_ori[:, -1].reshape(-1, 1)],
axis=1)
boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0)
wins = np.concatenate([wins, win_ori], axis=0)
ps = np.concatenate([ps, p], axis=0)
np.savez("/home/tongzheng/data/data-" + str(index), boards=boards, wins=wins, ps=ps)
np.savez("/home/tongzheng/data/data-" + str(index), state=boards, winner=wins, prob=ps)
print ("Thread {} has finished.".format(index))
thread_list = list()
for i in range(thread_num):

View File

@ -203,10 +203,11 @@ class Network(object):
if __name__ == '__main__':
state = np.random.randint(0, 1, [1, 9, 9, 17])
state = np.random.randint(0, 1, [256, 9, 9, 17])
net = Network()
sess = net.forward()
start = time.time()
start_time = time.time()
for i in range(100):
sess.run([tf.nn.softmax(net.p), net.v], feed_dict={net.x: state, net.is_training: False})
print("Step {}, Cumulative time {}".format(i, time.time() - start))
print("Step {}, use time {}".format(i, time.time() - start_time))
start_time = time.time()

View File

@ -2,15 +2,16 @@ import os
import numpy as np
import time
path = "/home/tongzheng/meta-data/"
save_path = "/home/tongzheng/data/"
size = 9
path = "/raid/tongzheng/tianshou/AlphaGo/data/part1/"
save_path = "/raid/tongzheng/tianshou/AlphaGo/data/"
name = os.listdir(path)
print(len(name))
batch_size = 128
batch_num = 512
block_size = batch_size * batch_num
slots_num = 32
slots_num = 16
class block(object):
@ -22,9 +23,9 @@ class block(object):
self.block_id = block_id
def concat(self, board, p, win):
board = board.reshape(-1, 19, 19, 17)
board = board.reshape(-1, size, size, 17)
win = win.reshape(-1, 1)
p = p.reshape(-1, 362)
p = p.reshape(-1, size ** 2 + 1)
self.boards.append(board)
self.wins.append(win)
self.ps.append(p)
@ -74,40 +75,40 @@ for n in name:
print("Shape {}".format(board.shape[0]))
start = -time.time()
for i in range(board.shape[0]):
board_ori = board[i].reshape(-1, 19, 19, 17)
board_ori = board[i].reshape(-1, size, size, 17)
win_ori = win[i].reshape(-1, 1)
p_ori = p[i].reshape(-1, 362)
p_ori = p[i].reshape(-1, size ** 2 + 1)
concat(block_list, board_ori, p_ori, win_ori)
for t in range(1, 4):
board_aug = np.rot90(board_ori, t, (1, 2))
p_aug = np.concatenate(
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), t, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
[np.rot90(p_ori[:, :-1].reshape(-1, size, size), t, (1, 2)).reshape(-1, size ** 2), p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)
board_aug = board_ori[:, ::-1]
p_aug = np.concatenate(
[p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
[p_ori[:, :-1].reshape(-1, size, size)[:, ::-1].reshape(-1, size ** 2), p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)
board_aug = board_ori[:, :, ::-1]
p_aug = np.concatenate(
[p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
[p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1].reshape(-1, size ** 2), p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)
board_aug = np.rot90(board_ori[:, ::-1], 1, (1, 2))
p_aug = np.concatenate(
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361),
[np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, ::-1], 1, (1, 2)).reshape(-1, size ** 2),
p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)
board_aug = np.rot90(board_ori[:, :, ::-1], 1, (1, 2))
p_aug = np.concatenate(
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361),
[np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1], 1, (1, 2)).reshape(-1, size ** 2),
p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)

View File

@ -45,6 +45,7 @@ class UCTNode(MCTSNode):
self.W = np.zeros([action_num])
self.N = np.zeros([action_num])
self.ucb = self.Q + c_puct * self.prior * math.sqrt(np.sum(self.N)) / (self.N + 1)
self.mask = None
def selection(self, simulator):
self.valid_mask(simulator)
@ -70,9 +71,15 @@ class UCTNode(MCTSNode):
self.parent.backpropagation(self.children[action].reward)
def valid_mask(self, simulator):
if self.mask is None:
start_time = time.time()
self.mask = []
for act in range(self.action_num - 1):
if not simulator.is_valid(self.state, act):
self.mask.append(act)
self.ucb[act] = -float("Inf")
else:
self.ucb[self.mask] = -float("Inf")
class TSNode(MCTSNode):
@ -160,6 +167,10 @@ class MCTS(object):
self.max_time = max_time
if max_step is None and max_time is None:
raise ValueError("Need a stop criteria!")
self.select_time = []
self.evaluate_time = []
self.bp_time = []
while (max_step is not None and self.step < self.max_step or max_step is None) \
and (max_time is not None and time.time() - self.start_time < self.max_time or max_time is None):
self.expand()
@ -171,6 +182,5 @@ class MCTS(object):
value = node.children[new_action].expansion(self.evaluator, self.action_num)
node.children[new_action].backpropagation(value + 0.)
if __name__ == "__main__":
pass