diff --git a/AlphaGo/data.py b/AlphaGo/data.py index 4a75f54..464ebb9 100644 --- a/AlphaGo/data.py +++ b/AlphaGo/data.py @@ -2,6 +2,7 @@ import os import threading import numpy as np +size = 9 path = "/home/yama/leela-zero/data/npz-files/" name = os.listdir(path) print(len(name)) @@ -9,21 +10,21 @@ thread_num = 17 batch_num = len(name) // thread_num def integrate(name, index): - boards = np.zeros([0, 19, 19, 17]) + boards = np.zeros([0, size, size, 17]) wins = np.zeros([0, 1]) - ps = np.zeros([0, 362]) + ps = np.zeros([0, size**2 + 1]) for n in name: data = np.load(path + n) - board = data["boards"] - win = data["win"] - p = data["p"] - # board = np.zeros([0, 19, 19, 17]) + board = data["state"] + win = data["winner"] + p = data["prob"] + # board = np.zeros([0, size, size, 17]) # win = np.zeros([0, 1]) - # p = np.zeros([0, 362]) + # p = np.zeros([0, size**2 + 1]) # for i in range(data["boards"].shape[3]): - # board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0) + # board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, size, size, 17)], axis=0) # win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0) - # p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0) + # p = np.concatenate([p, data["p"][:,i].reshape(-1, size**2 + 1)], axis=0) boards = np.concatenate([boards, board], axis=0) wins = np.concatenate([wins, win], axis=0) ps = np.concatenate([ps, p], axis=0) @@ -35,21 +36,21 @@ def integrate(name, index): for i in range(1, 3): board = np.rot90(board_ori, i, (1, 2)) p = np.concatenate( - [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + [np.rot90(p_ori[:, :-1].reshape(-1, size, size), i, (1, 2)).reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)], axis=1) boards = np.concatenate([boards, board], axis=0) wins = np.concatenate([wins, win_ori], axis=0) ps = np.concatenate([ps, p], axis=0) board = board_ori[:, ::-1] - p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + p = np.concatenate([p_ori[:, :-1].reshape(-1, size, size)[:, ::-1].reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)], axis=1) boards = np.concatenate([boards, board], axis=0) wins = np.concatenate([wins, win_ori], axis=0) ps = np.concatenate([ps, p], axis=0) board = board_ori[:, :, ::-1] - p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + p = np.concatenate([p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1].reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)], axis=1) boards = np.concatenate([boards, board], axis=0) wins = np.concatenate([wins, win_ori], axis=0) @@ -57,7 +58,7 @@ def integrate(name, index): board = board_ori[:, ::-1] p = np.concatenate( - [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + [np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, ::-1], 1, (1, 2)).reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)], axis=1) boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0) wins = np.concatenate([wins, win_ori], axis=0) @@ -65,14 +66,14 @@ def integrate(name, index): board = board_ori[:, :, ::-1] p = np.concatenate( - [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361), + [np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1], 1, (1, 2)).reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)], axis=1) boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0) wins = np.concatenate([wins, win_ori], axis=0) ps = np.concatenate([ps, p], axis=0) - np.savez("/home/tongzheng/data/data-" + str(index), boards=boards, wins=wins, ps=ps) + np.savez("/home/tongzheng/data/data-" + str(index), state=boards, winner=wins, prob=ps) print ("Thread {} has finished.".format(index)) thread_list = list() for i in range(thread_num): diff --git a/AlphaGo/network_small.py b/AlphaGo/network_small.py index 096aea6..975cf96 100644 --- a/AlphaGo/network_small.py +++ b/AlphaGo/network_small.py @@ -203,10 +203,11 @@ class Network(object): if __name__ == '__main__': - state = np.random.randint(0, 1, [1, 9, 9, 17]) + state = np.random.randint(0, 1, [256, 9, 9, 17]) net = Network() sess = net.forward() - start = time.time() + start_time = time.time() for i in range(100): sess.run([tf.nn.softmax(net.p), net.v], feed_dict={net.x: state, net.is_training: False}) - print("Step {}, Cumulative time {}".format(i, time.time() - start)) + print("Step {}, use time {}".format(i, time.time() - start_time)) + start_time = time.time() diff --git a/AlphaGo/random_data.py b/AlphaGo/random_data.py index b122e17..5b53bd6 100644 --- a/AlphaGo/random_data.py +++ b/AlphaGo/random_data.py @@ -2,15 +2,16 @@ import os import numpy as np import time -path = "/home/tongzheng/meta-data/" -save_path = "/home/tongzheng/data/" +size = 9 +path = "/raid/tongzheng/tianshou/AlphaGo/data/part1/" +save_path = "/raid/tongzheng/tianshou/AlphaGo/data/" name = os.listdir(path) print(len(name)) batch_size = 128 batch_num = 512 block_size = batch_size * batch_num -slots_num = 32 +slots_num = 16 class block(object): @@ -22,9 +23,9 @@ class block(object): self.block_id = block_id def concat(self, board, p, win): - board = board.reshape(-1, 19, 19, 17) + board = board.reshape(-1, size, size, 17) win = win.reshape(-1, 1) - p = p.reshape(-1, 362) + p = p.reshape(-1, size ** 2 + 1) self.boards.append(board) self.wins.append(win) self.ps.append(p) @@ -74,40 +75,40 @@ for n in name: print("Shape {}".format(board.shape[0])) start = -time.time() for i in range(board.shape[0]): - board_ori = board[i].reshape(-1, 19, 19, 17) + board_ori = board[i].reshape(-1, size, size, 17) win_ori = win[i].reshape(-1, 1) - p_ori = p[i].reshape(-1, 362) + p_ori = p[i].reshape(-1, size ** 2 + 1) concat(block_list, board_ori, p_ori, win_ori) for t in range(1, 4): board_aug = np.rot90(board_ori, t, (1, 2)) p_aug = np.concatenate( - [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), t, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + [np.rot90(p_ori[:, :-1].reshape(-1, size, size), t, (1, 2)).reshape(-1, size ** 2), p_ori[:, -1].reshape(-1, 1)], axis=1) concat(block_list, board_aug, p_aug, win_ori) board_aug = board_ori[:, ::-1] p_aug = np.concatenate( - [p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + [p_ori[:, :-1].reshape(-1, size, size)[:, ::-1].reshape(-1, size ** 2), p_ori[:, -1].reshape(-1, 1)], axis=1) concat(block_list, board_aug, p_aug, win_ori) board_aug = board_ori[:, :, ::-1] p_aug = np.concatenate( - [p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + [p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1].reshape(-1, size ** 2), p_ori[:, -1].reshape(-1, 1)], axis=1) concat(block_list, board_aug, p_aug, win_ori) board_aug = np.rot90(board_ori[:, ::-1], 1, (1, 2)) p_aug = np.concatenate( - [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361), + [np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, ::-1], 1, (1, 2)).reshape(-1, size ** 2), p_ori[:, -1].reshape(-1, 1)], axis=1) concat(block_list, board_aug, p_aug, win_ori) board_aug = np.rot90(board_ori[:, :, ::-1], 1, (1, 2)) p_aug = np.concatenate( - [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361), + [np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1], 1, (1, 2)).reshape(-1, size ** 2), p_ori[:, -1].reshape(-1, 1)], axis=1) concat(block_list, board_aug, p_aug, win_ori) diff --git a/tianshou/core/mcts/mcts.py b/tianshou/core/mcts/mcts.py index db9a7cf..e29d919 100644 --- a/tianshou/core/mcts/mcts.py +++ b/tianshou/core/mcts/mcts.py @@ -45,6 +45,7 @@ class UCTNode(MCTSNode): self.W = np.zeros([action_num]) self.N = np.zeros([action_num]) self.ucb = self.Q + c_puct * self.prior * math.sqrt(np.sum(self.N)) / (self.N + 1) + self.mask = None def selection(self, simulator): self.valid_mask(simulator) @@ -70,9 +71,15 @@ class UCTNode(MCTSNode): self.parent.backpropagation(self.children[action].reward) def valid_mask(self, simulator): - for act in range(self.action_num - 1): - if not simulator.is_valid(self.state, act): - self.ucb[act] = -float("Inf") + if self.mask is None: + start_time = time.time() + self.mask = [] + for act in range(self.action_num - 1): + if not simulator.is_valid(self.state, act): + self.mask.append(act) + self.ucb[act] = -float("Inf") + else: + self.ucb[self.mask] = -float("Inf") class TSNode(MCTSNode): @@ -160,6 +167,10 @@ class MCTS(object): self.max_time = max_time if max_step is None and max_time is None: raise ValueError("Need a stop criteria!") + + self.select_time = [] + self.evaluate_time = [] + self.bp_time = [] while (max_step is not None and self.step < self.max_step or max_step is None) \ and (max_time is not None and time.time() - self.start_time < self.max_time or max_time is None): self.expand() @@ -171,6 +182,5 @@ class MCTS(object): value = node.children[new_action].expansion(self.evaluator, self.action_num) node.children[new_action].backpropagation(value + 0.) - if __name__ == "__main__": pass