diff --git a/AlphaGo/Network.py b/AlphaGo/Network.py
index 12e0a37..ac999b3 100644
--- a/AlphaGo/Network.py
+++ b/AlphaGo/Network.py
@@ -134,10 +134,18 @@ def forward(call_number):
checkpoint_path = "/home/jialian/stuGo/tianshou/stuGo/checkpoints/"
board_file = np.genfromtxt("/home/jialian/stuGo/tianshou/leela-zero/src/mcts_nn_files/board_" + call_number, dtype='str');
human_board = np.zeros((17, 19, 19))
+
#TODO : is it ok to ignore the last channel?
for i in range(17):
human_board[i] = np.array(list(board_file[i])).reshape(19, 19)
+ #print("============================")
+ #print("human board sum : " + str(np.sum(human_board[-1])))
+ #print("============================")
+ #print(human_board)
+ #print("============================")
+ #rint(human_board)
feed_board = human_board.transpose(1, 2, 0).reshape(1, 19, 19, 17)
+ #print(feed_board[:,:,:,-1])
#print(feed_board.shape)
#npz_board = np.load("/home/yama/rl/tianshou/AlphaGo/data/7f83928932f64a79bc1efdea268698ae.npz")
@@ -148,7 +156,7 @@ def forward(call_number):
#print("board shape : ", show_board.shape)
#print(show_board)
- itflag = True
+ itflag = False
with multi_gpu.create_session() as sess:
sess.run(tf.global_variables_initializer())
ckpt_file = tf.train.latest_checkpoint(checkpoint_path)
diff --git a/AlphaGo/Network_ori.py b/AlphaGo/Network_ori.py
new file mode 100644
index 0000000..fb851cf
--- /dev/null
+++ b/AlphaGo/Network_ori.py
@@ -0,0 +1,173 @@
+import os
+import time
+import gc
+
+import numpy as np
+import tensorflow as tf
+import tensorflow.contrib.layers as layers
+
+import multi_gpu
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+
+
+def residual_block(input, is_training):
+ normalizer_params = {'is_training': is_training,
+ 'updates_collections': tf.GraphKeys.UPDATE_OPS}
+ h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
+ normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
+ weights_regularizer=layers.l2_regularizer(1e-4))
+ h = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
+ normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
+ weights_regularizer=layers.l2_regularizer(1e-4))
+ h = h + input
+ return tf.nn.relu(h)
+
+
+def policy_heads(input, is_training):
+ normalizer_params = {'is_training': is_training,
+ 'updates_collections': tf.GraphKeys.UPDATE_OPS}
+ h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
+ normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
+ weights_regularizer=layers.l2_regularizer(1e-4))
+ h = layers.flatten(h)
+ h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4))
+ return h
+
+
+def value_heads(input, is_training):
+ normalizer_params = {'is_training': is_training,
+ 'updates_collections': tf.GraphKeys.UPDATE_OPS}
+ h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
+ normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
+ weights_regularizer=layers.l2_regularizer(1e-4))
+ h = layers.flatten(h)
+ h = layers.fully_connected(h, 256, activation_fn=tf.nn.relu, weights_regularizer=layers.l2_regularizer(1e-4))
+ h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))
+ return h
+
+
+x = tf.placeholder(tf.float32, shape=[None, 19, 19, 17])
+is_training = tf.placeholder(tf.bool, shape=[])
+z = tf.placeholder(tf.float32, shape=[None, 1])
+pi = tf.placeholder(tf.float32, shape=[None, 362])
+
+h = layers.conv2d(x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm,
+ normalizer_params={'is_training': is_training, 'updates_collections': tf.GraphKeys.UPDATE_OPS},
+ weights_regularizer=layers.l2_regularizer(1e-4))
+for i in range(19):
+ h = residual_block(h, is_training)
+v = value_heads(h, is_training)
+p = policy_heads(h, is_training)
+# loss = tf.reduce_mean(tf.square(z-v)) - tf.multiply(pi, tf.log(tf.clip_by_value(tf.nn.softmax(p), 1e-8, tf.reduce_max(tf.nn.softmax(p)))))
+value_loss = tf.reduce_mean(tf.square(z - v))
+policy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=pi, logits=p))
+
+reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+total_loss = value_loss + policy_loss + reg
+# train_op = tf.train.MomentumOptimizer(1e-4, momentum=0.9, use_nesterov=True).minimize(total_loss)
+update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+with tf.control_dependencies(update_ops):
+ train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
+var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
+saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
+
+
+def train():
+ data_path = "/home/tongzheng/data/"
+ data_name = os.listdir("/home/tongzheng/data/")
+ epochs = 100
+ batch_size = 128
+
+ result_path = "./checkpoints/"
+ with multi_gpu.create_session() as sess:
+ sess.run(tf.global_variables_initializer())
+ ckpt_file = tf.train.latest_checkpoint(result_path)
+ if ckpt_file is not None:
+ print('Restoring model from {}...'.format(ckpt_file))
+ saver.restore(sess, ckpt_file)
+ for epoch in range(epochs):
+ for name in data_name:
+ data = np.load(data_path + name)
+ boards = data["boards"]
+ wins = data["wins"]
+ ps = data["ps"]
+ print (boards.shape)
+ print (wins.shape)
+ print (ps.shape)
+ # batch_num = 1
+ batch_num = boards.shape[0] // batch_size
+ index = np.arange(boards.shape[0])
+ np.random.shuffle(index)
+ value_losses = []
+ policy_losses = []
+ regs = []
+ time_train = -time.time()
+ for iter in range(batch_num):
+ lv, lp, r, _ = sess.run([value_loss, policy_loss, reg, train_op],
+ feed_dict={x: boards[
+ index[iter * batch_size:(iter + 1) * batch_size]],
+ z: wins[index[
+ iter * batch_size:(iter + 1) * batch_size]],
+ pi: ps[index[
+ iter * batch_size:(iter + 1) * batch_size]],
+ is_training: True})
+ value_losses.append(lv)
+ policy_losses.append(lp)
+ regs.append(r)
+ del lv, lp, r
+ if iter % 1 == 0:
+ print(
+ "Epoch: {}, Part {}, Iteration: {}, Time: {}, Value Loss: {}, Policy Loss: {}, Reg: {}".format(
+ epoch, name, iter, time.time() + time_train, np.mean(np.array(value_losses)),
+ np.mean(np.array(policy_losses)), np.mean(np.array(regs))))
+ del value_losses, policy_losses, regs, time_train
+ time_train = -time.time()
+ value_losses = []
+ policy_losses = []
+ regs = []
+ if iter % 20 == 0:
+ save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter)
+ saver.save(sess, result_path + save_path)
+ del save_path
+ del data, boards, wins, ps, batch_num, index
+ gc.collect()
+def forward(board):
+ result_path = "./checkpoints"
+ itflag = False
+ res = None
+ if board is None:
+ # data = np.load("/home/tongzheng/meta-data/80b7bf21bce14862806d48c3cd760a1b.npz")
+ data = np.load("./data/7f83928932f64a79bc1efdea268698ae.npz")
+ board = data["boards"][50].reshape(-1, 19, 19, 17)
+ human_board = board[0].transpose(2, 0, 1)
+ print("============================")
+ print("human board sum : " + str(np.sum(human_board)))
+ print("============================")
+ print(board[:,:,:,-1])
+ itflag = False
+ with multi_gpu.create_session() as sess:
+ sess.run(tf.global_variables_initializer())
+ ckpt_file = tf.train.latest_checkpoint(result_path)
+ if ckpt_file is not None:
+ print('Restoring model from {}...'.format(ckpt_file))
+ saver.restore(sess, ckpt_file)
+ else:
+ raise ValueError("No model loaded")
+ res = sess.run([tf.nn.softmax(p), v], feed_dict={x: board, is_training: itflag})
+ # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False})
+ # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True})
+ # print(np.argmax(res[0]))
+ print(res)
+ print(data["p"][0])
+ print(np.argmax(res[0]))
+ print(np.argmax(data["p"][0]))
+ # print(res[0].tolist()[0])
+ # print(np.argmax(res[0]))
+ return res
+
+
+if __name__ == '__main__':
+ # train()
+ # if sys.argv[1] == "test":
+ forward(None)
diff --git a/AlphaGo/README.md b/AlphaGo/README.md
new file mode 100644
index 0000000..d21b9bd
--- /dev/null
+++ b/AlphaGo/README.md
@@ -0,0 +1,12 @@
+# Reproduce AlphaGo Zero
+
+## Network.py
+A reimplemented version of pv network which is presented in the nature paper.
+
+## code-verify:
+
+Connecting our own policy-value neural network with leela-zero.
+
+## checkpoints:
+
+Weights of the policy-value neural network
diff --git a/AlphaGo/code-verify/Network.cpp b/AlphaGo/code-verify/Network.cpp
new file mode 100644
index 0000000..6d2b8be
--- /dev/null
+++ b/AlphaGo/code-verify/Network.cpp
@@ -0,0 +1,710 @@
+/*
+ This file is part of Leela Zero.
+ Copyright (C) 2017 Gian-Carlo Pascutto
+
+ Leela Zero is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Leela Zero is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Leela Zero. If not, see .
+*/
+
+#include "config.h"
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+//#include
+#include
+
+#include "Im2Col.h"
+
+#ifdef __APPLE__
+#include
+#endif
+#ifdef USE_MKL
+#include
+#endif
+#ifdef USE_OPENBLAS
+
+#include
+
+#endif
+#ifdef USE_OPENCL
+
+#include "OpenCL.h"
+#include "UCTNode.h"
+
+#endif
+
+#include "SGFTree.h"
+#include "SGFParser.h"
+#include "Utils.h"
+#include "FastBoard.h"
+#include "Random.h"
+#include "Network.h"
+#include "GTP.h"
+#include "Utils.h"
+
+using namespace Utils;
+
+// Input + residual block tower
+std::vector> conv_weights;
+std::vector> conv_biases;
+std::vector> batchnorm_means;
+std::vector> batchnorm_variances;
+
+// Policy head
+std::vector conv_pol_w;
+std::vector conv_pol_b;
+std::array bn_pol_w1;
+std::array bn_pol_w2;
+
+std::array ip_pol_w;
+std::array ip_pol_b;
+
+// Value head
+std::vector conv_val_w;
+std::vector conv_val_b;
+std::array bn_val_w1;
+std::array bn_val_w2;
+
+std::array ip1_val_w;
+std::array ip1_val_b;
+
+std::array ip2_val_w;
+std::array ip2_val_b;
+
+void Network::benchmark(GameState *state) {
+ {
+ int BENCH_AMOUNT = 1600;
+ int cpus = cfg_num_threads;
+ int iters_per_thread = (BENCH_AMOUNT + (cpus - 1)) / cpus;
+
+ Time start;
+
+ ThreadGroup tg(thread_pool);
+ for (int i = 0; i < cpus; i++) {
+ tg.add_task([iters_per_thread, state]() {
+ GameState mystate = *state;
+ for (int loop = 0; loop < iters_per_thread; loop++) {
+ auto vec = get_scored_moves(&mystate, Ensemble::RANDOM_ROTATION);
+ }
+ });
+ };
+ tg.wait_all();
+
+ Time end;
+
+ myprintf("%5d evaluations in %5.2f seconds -> %d n/s\n",
+ BENCH_AMOUNT,
+ (float) Time::timediff(start, end) / 100.0,
+ (int) ((float) BENCH_AMOUNT / ((float) Time::timediff(start, end) / 100.0)));
+ }
+}
+
+void Network::initialize(void) {
+#ifdef USE_OPENCL
+ myprintf("Initializing OpenCL\n");
+ opencl.initialize();
+
+ // Count size of the network
+ myprintf("Detecting residual layers...");
+ std::ifstream wtfile(cfg_weightsfile);
+ if (wtfile.fail()) {
+ myprintf("Could not open weights file: %s\n", cfg_weightsfile.c_str());
+ exit(EXIT_FAILURE);
+ }
+ std::string line;
+ auto linecount = size_t{0};
+ while (std::getline(wtfile, line)) {
+ // Second line of parameters are the convolution layer biases,
+ // so this tells us the amount of channels in the residual layers.
+ // (Provided they're all equally large - that's not actually required!)
+ if (linecount == 1) {
+ std::stringstream ss(line);
+ auto count = std::distance(std::istream_iterator(ss),
+ std::istream_iterator());
+ myprintf("%d channels...", count);
+ }
+ linecount++;
+ }
+ // 1 input layer (4 x weights), 14 ending weights, the rest are residuals
+ // every residual has 8 x weight lines
+ auto residual_layers = linecount - (4 + 14);
+ if (residual_layers % 8 != 0) {
+ myprintf("\nInconsistent number of weights in the file.\n");
+ exit(EXIT_FAILURE);
+ }
+ residual_layers /= 8;
+ myprintf("%d layers\nTransferring weights to GPU...", residual_layers);
+
+ // Re-read file and process
+ wtfile.clear();
+ wtfile.seekg(0, std::ios::beg);
+
+ auto plain_conv_layers = 1 + (residual_layers * 2);
+ auto plain_conv_wts = plain_conv_layers * 4;
+ linecount = 0;
+ while (std::getline(wtfile, line)) {
+ std::vector weights;
+ float weight;
+ std::istringstream iss(line);
+ while (iss >> weight) {
+ weights.emplace_back(weight);
+ }
+ if (linecount < plain_conv_wts) {
+ if (linecount % 4 == 0) {
+ conv_weights.emplace_back(weights);
+ } else if (linecount % 4 == 1) {
+ conv_biases.emplace_back(weights);
+ } else if (linecount % 4 == 2) {
+ batchnorm_means.emplace_back(weights);
+ } else if (linecount % 4 == 3) {
+ batchnorm_variances.emplace_back(weights);
+ }
+ } else if (linecount == plain_conv_wts) {
+ conv_pol_w = std::move(weights);
+ } else if (linecount == plain_conv_wts + 1) {
+ conv_pol_b = std::move(weights);
+ } else if (linecount == plain_conv_wts + 2) {
+ std::copy(begin(weights), end(weights), begin(bn_pol_w1));
+ } else if (linecount == plain_conv_wts + 3) {
+ std::copy(begin(weights), end(weights), begin(bn_pol_w2));
+ } else if (linecount == plain_conv_wts + 4) {
+ std::copy(begin(weights), end(weights), begin(ip_pol_w));
+ } else if (linecount == plain_conv_wts + 5) {
+ std::copy(begin(weights), end(weights), begin(ip_pol_b));
+ } else if (linecount == plain_conv_wts + 6) {
+ conv_val_w = std::move(weights);
+ } else if (linecount == plain_conv_wts + 7) {
+ conv_val_b = std::move(weights);
+ } else if (linecount == plain_conv_wts + 8) {
+ std::copy(begin(weights), end(weights), begin(bn_val_w1));
+ } else if (linecount == plain_conv_wts + 9) {
+ std::copy(begin(weights), end(weights), begin(bn_val_w2));
+ } else if (linecount == plain_conv_wts + 10) {
+ std::copy(begin(weights), end(weights), begin(ip1_val_w));
+ } else if (linecount == plain_conv_wts + 11) {
+ std::copy(begin(weights), end(weights), begin(ip1_val_b));
+ } else if (linecount == plain_conv_wts + 12) {
+ std::copy(begin(weights), end(weights), begin(ip2_val_w));
+ } else if (linecount == plain_conv_wts + 13) {
+ std::copy(begin(weights), end(weights), begin(ip2_val_b));
+ }
+ linecount++;
+ }
+ wtfile.close();
+
+ // input
+ size_t weight_index = 0;
+ opencl_net.push_convolve(3, conv_weights[weight_index],
+ conv_biases[weight_index]);
+ opencl_net.push_batchnorm(361, batchnorm_means[weight_index],
+ batchnorm_variances[weight_index]);
+ weight_index++;
+
+ // residual blocks
+ for (auto i = size_t{0}; i < residual_layers; i++) {
+ opencl_net.push_residual(3, conv_weights[weight_index],
+ conv_biases[weight_index],
+ batchnorm_means[weight_index],
+ batchnorm_variances[weight_index],
+ conv_weights[weight_index + 1],
+ conv_biases[weight_index + 1],
+ batchnorm_means[weight_index + 1],
+ batchnorm_variances[weight_index + 1]);
+ weight_index += 2;
+ }
+ myprintf("done\n");
+#endif
+#ifdef USE_BLAS
+#ifndef __APPLE__
+#ifdef USE_OPENBLAS
+ openblas_set_num_threads(1);
+ myprintf("BLAS Core: %s\n", openblas_get_corename());
+#endif
+#ifdef USE_MKL
+ //mkl_set_threading_layer(MKL_THREADING_SEQUENTIAL);
+ mkl_set_num_threads(1);
+ MKLVersion Version;
+ mkl_get_version(&Version);
+ myprintf("BLAS core: MKL %s\n", Version.Processor);
+#endif
+#endif
+#endif
+}
+
+#ifdef USE_BLAS
+
+template
+void convolve(const std::vector &input,
+ const std::vector &weights,
+ const std::vector &biases,
+ std::vector &output) {
+ // fixed for 19x19
+ constexpr unsigned int width = 19;
+ constexpr unsigned int height = 19;
+ constexpr unsigned int spatial_out = width * height;
+ constexpr unsigned int filter_len = filter_size * filter_size;
+
+ auto channels = int(weights.size() / (biases.size() * filter_len));
+ unsigned int filter_dim = filter_len * channels;
+
+ std::vector col(filter_dim * width * height);
+ im2col(channels, input, col);
+
+ // Weight shape (output, input, filter_size, filter_size)
+ // 96 22 5 5
+ // outputs[96,19x19] = weights[96,22x9] x col[22x9,19x19]
+ // C←αAB + βC
+ // M Number of rows in matrices A and C.
+ // N Number of columns in matrices B and C.
+ // K Number of columns in matrix A; number of rows in matrix B.
+ // lda The size of the first dimention of matrix A; if you are
+ // passing a matrix A[m][n], the value should be m.
+ // cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
+ // ldb, beta, C, N);
+
+ cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
+ // M N K
+ outputs, spatial_out, filter_dim,
+ 1.0f, &weights[0], filter_dim,
+ &col[0], spatial_out,
+ 0.0f, &output[0], spatial_out);
+
+ for (unsigned int o = 0; o < outputs; o++) {
+ for (unsigned int b = 0; b < spatial_out; b++) {
+ output[(o * spatial_out) + b] =
+ biases[o] + output[(o * spatial_out) + b];
+ }
+ }
+}
+
+template
+void innerproduct(const std::vector &input,
+ const std::array &weights,
+ const std::array &biases,
+ std::vector &output) {
+ assert(B == outputs);
+
+ cblas_sgemv(CblasRowMajor, CblasNoTrans,
+ // M K
+ outputs, inputs,
+ 1.0f, &weights[0], inputs,
+ &input[0], 1,
+ 0.0f, &output[0], 1);
+
+ auto lambda_ReLU = [](float val) {
+ return (val > 0.0f) ?
+ val : 0.0f;
+ };
+
+ for (unsigned int o = 0; o < outputs; o++) {
+ float val = biases[o] + output[o];
+ if (outputs == 256) {
+ val = lambda_ReLU(val);
+ }
+ output[o] = val;
+ }
+}
+
+template
+void batchnorm(const std::vector &input,
+ const std::array &means,
+ const std::array &variances,
+ std::vector &output) {
+ constexpr float epsilon = 1e-5f;
+
+ auto lambda_ReLU = [](float val) {
+ return (val > 0.0f) ?
+ val : 0.0f;
+ };
+
+ for (unsigned int c = 0; c < channels; ++c) {
+ float mean = means[c];
+ float variance = variances[c] + epsilon;
+ float scale_stddiv = 1.0f / std::sqrt(variance);
+
+ float *out = &output[c * spatial_size];
+ float const *in = &input[c * spatial_size];
+ for (unsigned int b = 0; b < spatial_size; b++) {
+ out[b] = lambda_ReLU(scale_stddiv * (in[b] - mean));
+ }
+ }
+}
+
+#endif
+
+void Network::softmax(const std::vector &input,
+ std::vector &output,
+ float temperature) {
+ assert(&input != &output);
+
+ float alpha = *std::max_element(input.begin(),
+ input.begin() + output.size());
+ alpha /= temperature;
+
+ float denom = 0.0f;
+ std::vector helper(output.size());
+ for (size_t i = 0; i < output.size(); i++) {
+ float val = std::exp((input[i] / temperature) - alpha);
+ helper[i] = val;
+ denom += val;
+ }
+ for (size_t i = 0; i < output.size(); i++) {
+ output[i] = helper[i] / denom;
+ }
+}
+
+/* magic code, only execute once, what is the mechanism?
+void function() {
+ static const auto runOnce = [] (auto content) { std::cout << content << std::endl; return true;};
+}
+ */
+
+std::string exec(const char* cmd) {
+ std::array buffer;
+ std::string result;
+ std::shared_ptr pipe(popen(cmd, "r"), pclose);
+ if (!pipe) throw std::runtime_error("popen() failed!");
+ while (!feof(pipe.get())) {
+ if (fgets(buffer.data(), 128, pipe.get()) != nullptr)
+ result += buffer.data();
+ }
+ return result;
+}
+
+Network::Netresult Network::get_scored_moves(
+ GameState *state, Ensemble ensemble, int rotation) {
+ Netresult result;
+ if (state->board.get_boardsize() != 19) {
+ return result;
+ }
+
+ NNPlanes planes;
+ gather_features(state, planes);
+
+ /*
+ * tianshou_code
+ * writing the board information to local file
+ */
+ //static std::once_flag plane_size_flag;
+ //std::call_once ( plane_size_flag, [&]{ printf("Network::get_scored_moves planses size : %d\n", planes.size());} );
+ //std::call_once ( plane_content_flag, [&]{
+ /*
+ for (int i = 0; i < board_size; ++i) {
+ for (int j = 0; j < board_size; ++j) {
+ std::cout << planes[k][i * board_size + j] << " ";
+ }
+ printf("\n");
+ }
+ printf("======================================\n");
+ */
+ // } );
+ static int call_number = 0;
+ call_number++;
+ std::ofstream mctsnn_file;
+ mctsnn_file.open("/home/yama/rl/tianshou/leela-zero/src/mcts_nn_files/board_" + std::to_string(call_number));
+ const int total_count = board_size * board_size;
+ for (int k = 0; k < planes.size(); ++k) {
+ for (int i = 0; i < total_count; ++i) {
+ mctsnn_file << planes[k][i];
+ }
+ mctsnn_file << '\n';
+ }
+ mctsnn_file.close();
+
+ const int extended_board_size = board_size + 2;
+ Network::Netresult nn_res;
+ std::string cmd = "python /home/yama/rl/tianshou/AlphaGo/Network.py " + std::to_string(call_number);
+ std::string res = exec(cmd.c_str());
+ //std::cout << res << std::endl;
+ std::string buf; // Have a buffer string
+ std::stringstream ss(res); // Insert the string into a stream
+ const int policy_size = board_size * board_size + 1;
+ int idx = 0;
+ for (int i = 0; i < extended_board_size; ++i) {
+ for (int j = 0; j < extended_board_size; ++j) {
+ if ((0 < i) && (i < board_size + 1) && (0 < j) && (j < board_size + 1)) {
+ ss >> buf;
+ nn_res.first.emplace_back(std::make_pair(std::stod(buf), idx));
+ //std::cout << std::fixed << "[" << std::stod(buf) << "," << idx << "]\n";
+ }
+ idx++;
+ }
+ }
+ // probability of pass
+ ss >> buf;
+ nn_res.first.emplace_back(std::make_pair(std::stod(buf), -1));
+ std::cout << "tianshou nn output : \t\n";
+ auto max_iterator = std::max_element(nn_res.first.begin(), nn_res.first.end());
+ int argmax = std::distance(nn_res.first.begin(), max_iterator);
+ int line = (argmax / board_size) + 1, column = (argmax % board_size) + 1;
+ std::cout << "\tmove : " << argmax << " [" << line << "," << column << "]" << std::endl;
+ // evaluation of state value
+ ss >> buf;
+ nn_res.second = std::stod(buf);
+ std::cout << "\tvalue : " << nn_res.second << std::endl;
+
+ if (ensemble == DIRECT) {
+ assert(rotation >= 0 && rotation <= 7);
+ result = get_scored_moves_internal(state, planes, rotation);
+ } else {
+ assert(ensemble == RANDOM_ROTATION);
+ assert(rotation == -1);
+ int rand_rot = Random::get_Rng()->randfix<8>();
+ std::cout << "rotation : " << rand_rot << std::endl;
+ result = get_scored_moves_internal(state, planes, rand_rot);
+ }
+
+ /*
+ static std::once_flag of;
+ std::call_once(of, [&] { } );
+ for (auto ele: result.first) {
+ std::cout << std::fixed << "[" << ele.first << "," << ele.second << "]\n";
+ }
+ */
+ std::cout << "leela nn output : \t\n";
+ max_iterator = std::max_element(result.first.begin(), result.first.end());
+
+ argmax = std::distance(result.first.begin(), max_iterator);
+ line = (argmax / board_size) + 1, column = (argmax % board_size) + 1;
+ std::cout << "\tmove : " << argmax << " [" << line << "," << column << "]" << std::endl;
+ std::cout << "\tvalue : " << result.second << std::endl;
+
+ return nn_res;
+ //return result;
+}
+
+Network::Netresult Network::get_scored_moves_internal(
+ GameState *state, NNPlanes &planes, int rotation) {
+ assert(rotation >= 0 && rotation <= 7);
+ constexpr int channels = INPUT_CHANNELS;
+ assert(channels == planes.size());
+ constexpr int width = 19;
+ constexpr int height = 19;
+ constexpr int max_channels = MAX_CHANNELS;
+ std::vector input_data(max_channels * width * height);
+ std::vector output_data(max_channels * width * height);
+ std::vector policy_data_1(2 * width * height);
+ std::vector policy_data_2(2 * width * height);
+ std::vector value_data_1(1 * width * height);
+ std::vector value_data_2(1 * width * height);
+ std::vector policy_out((width * height) + 1);
+ std::vector softmax_data((width * height) + 1);
+ std::vector winrate_data(256);
+ std::vector winrate_out(1);
+ for (int c = 0; c < channels; ++c) {
+ for (int h = 0; h < height; ++h) {
+ for (int w = 0; w < width; ++w) {
+ int vtx = rotate_nn_idx(h * 19 + w, rotation);
+ input_data[(c * height + h) * width + w] =
+ (float) planes[c][vtx];
+ }
+ }
+ }
+#ifdef USE_OPENCL
+ opencl_net.forward(input_data, output_data);
+ // Get the moves
+ convolve<1, 2>(output_data, conv_pol_w, conv_pol_b, policy_data_1);
+ batchnorm<2, 361>(policy_data_1, bn_pol_w1, bn_pol_w2, policy_data_2);
+ innerproduct<2 * 361, 362>(policy_data_2, ip_pol_w, ip_pol_b, policy_out);
+ softmax(policy_out, softmax_data, cfg_softmax_temp);
+ std::vector &outputs = softmax_data;
+
+ // Now get the score
+ convolve<1, 1>(output_data, conv_val_w, conv_val_b, value_data_1);
+ batchnorm<1, 361>(value_data_1, bn_val_w1, bn_val_w2, value_data_2);
+ innerproduct<361, 256>(value_data_2, ip1_val_w, ip1_val_b, winrate_data);
+ innerproduct<256, 1>(winrate_data, ip2_val_w, ip2_val_b, winrate_out);
+
+ // Sigmoid
+ float winrate_sig = (1.0f + std::tanh(winrate_out[0])) / 2.0f;
+#elif defined(USE_BLAS) && !defined(USE_OPENCL)
+#error "Not implemented"
+ // Not implemented yet - not very useful unless you have some
+ // sort of Xeon Phi
+ softmax(output_data, softmax_data, cfg_softmax_temp);
+ // Move scores
+ std::vector& outputs = softmax_data;
+#endif
+ std::vector result;
+ for (size_t idx = 0; idx < outputs.size(); idx++) {
+ if (idx < 19 * 19) {
+ auto rot_idx = rev_rotate_nn_idx(idx, rotation);
+ auto val = outputs[rot_idx];
+ int x = idx % 19;
+ int y = idx / 19;
+ int vtx = state->board.get_vertex(x, y);
+ if (state->board.get_square(vtx) == FastBoard::EMPTY) {
+ result.emplace_back(val, vtx);
+ }
+ } else {
+ result.emplace_back(outputs[idx], FastBoard::PASS);
+ }
+ }
+
+ return std::make_pair(result, winrate_sig);
+}
+
+void Network::show_heatmap(FastState *state, Netresult &result, bool topmoves) {
+ auto moves = result.first;
+ std::vector display_map;
+ std::string line;
+
+ for (unsigned int y = 0; y < 19; y++) {
+ for (unsigned int x = 0; x < 19; x++) {
+ int vtx = state->board.get_vertex(x, y);
+
+ auto item = std::find_if(moves.cbegin(), moves.cend(),
+ [&vtx](scored_node const &item) {
+ return item.second == vtx;
+ });
+
+ float score = 0.0f;
+ // Non-empty squares won't be scored
+ if (item != moves.end()) {
+ score = item->first;
+ assert(vtx == item->second);
+ }
+
+ line += boost::str(boost::format("%3d ") % int(score * 1000));
+ if (x == 18) {
+ display_map.push_back(line);
+ line.clear();
+ }
+ }
+ }
+
+ for (int i = display_map.size() - 1; i >= 0; --i) {
+ myprintf("%s\n", display_map[i].c_str());
+ }
+ assert(result.first.back().second == FastBoard::PASS);
+ int pass_score = int(result.first.back().first * 1000);
+ myprintf("pass: %d\n", pass_score);
+ myprintf("winrate: %f\n", result.second);
+
+ if (topmoves) {
+ std::stable_sort(moves.rbegin(), moves.rend());
+
+ float cum = 0.0f;
+ size_t tried = 0;
+ while (cum < 0.85f && tried < moves.size()) {
+ if (moves[tried].first < 0.01f) break;
+ myprintf("%1.3f (%s)\n",
+ moves[tried].first,
+ state->board.move_to_text(moves[tried].second).c_str());
+ cum += moves[tried].first;
+ tried++;
+ }
+ }
+}
+
+void Network::gather_features(GameState *state, NNPlanes &planes) {
+ planes.resize(18);
+ const size_t our_offset = 0;
+ const size_t their_offset = 8;
+ BoardPlane &black_to_move = planes[16];
+ BoardPlane &white_to_move = planes[17];
+
+ bool whites_move = state->get_to_move() == FastBoard::WHITE;
+ // tianshou_code
+ //std::cout << "whites_move : " << whites_move << std::endl;
+ if (whites_move) {
+ white_to_move.set();
+ } else {
+ black_to_move.set();
+ }
+
+ // Go back in time, fill history boards
+ size_t backtracks = 0;
+ for (int h = 0; h < 8; h++) {
+ int tomove = state->get_to_move();
+ // collect white, black occupation planes
+ for (int j = 0; j < 19; j++) {
+ for (int i = 0; i < 19; i++) {
+ int vtx = state->board.get_vertex(i, j);
+ FastBoard::square_t color =
+ state->board.get_square(vtx);
+ int idx = j * 19 + i;
+ if (color != FastBoard::EMPTY) {
+ if (color == tomove) {
+ planes[our_offset + h][idx] = true;
+ } else {
+ planes[their_offset + h][idx] = true;
+ }
+ }
+ }
+ }
+ if (!state->undo_move()) {
+ break;
+ } else {
+ backtracks++;
+ }
+ }
+
+ // Now go back to present day
+ for (size_t h = 0; h < backtracks; h++) {
+ state->forward_move();
+ }
+}
+
+int Network::rev_rotate_nn_idx(const int vertex, int symmetry) {
+ static const int invert[] = {0, 1, 2, 3, 4, 6, 5, 7};
+ assert(rotate_nn_idx(rotate_nn_idx(vertex, symmetry), invert[symmetry])
+ == vertex);
+ return rotate_nn_idx(vertex, invert[symmetry]);
+}
+
+int Network::rotate_nn_idx(const int vertex, int symmetry) {
+ assert(vertex >= 0 && vertex < 19 * 19);
+ assert(symmetry >= 0 && symmetry < 8);
+ int x = vertex % 19;
+ int y = vertex / 19;
+ int newx;
+ int newy;
+
+ if (symmetry >= 4) {
+ std::swap(x, y);
+ symmetry -= 4;
+ }
+
+ if (symmetry == 0) {
+ newx = x;
+ newy = y;
+ } else if (symmetry == 1) {
+ newx = x;
+ newy = 19 - y - 1;
+ } else if (symmetry == 2) {
+ newx = 19 - x - 1;
+ newy = y;
+ } else {
+ assert(symmetry == 3);
+ newx = 19 - x - 1;
+ newy = 19 - y - 1;
+ }
+
+ int newvtx = (newy * 19) + newx;
+ assert(newvtx >= 0 && newvtx < 19 * 19);
+ return newvtx;
+}
diff --git a/AlphaGo/code-verify/Network.h b/AlphaGo/code-verify/Network.h
new file mode 100644
index 0000000..82e3632
--- /dev/null
+++ b/AlphaGo/code-verify/Network.h
@@ -0,0 +1,73 @@
+/*
+ This file is part of Leela Zero.
+ Copyright (C) 2017 Gian-Carlo Pascutto
+
+ Leela Zero is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Leela Zero is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Leela Zero. If not, see .
+*/
+
+#ifndef NETWORK_H_INCLUDED
+#define NETWORK_H_INCLUDED
+
+#include "config.h"
+#include
+#include
+#include
+#include
+#include
+
+#ifdef USE_OPENCL
+#include
+class UCTNode;
+#endif
+
+#include "FastState.h"
+#include "GameState.h"
+
+class Network {
+public:
+ enum Ensemble {
+ DIRECT, RANDOM_ROTATION
+ };
+ static const int board_size = 19;
+ using BoardPlane = std::bitset;
+ using NNPlanes = std::vector;
+ using scored_node = std::pair;
+ using Netresult = std::pair, float>;
+
+ static Netresult get_scored_moves(GameState * state,
+ Ensemble ensemble,
+ int rotation = -1);
+ static constexpr int INPUT_CHANNELS = 18;
+ static constexpr int MAX_CHANNELS = 256;
+
+ static void initialize();
+ static void benchmark(GameState * state);
+ static void show_heatmap(FastState * state, Netresult & netres, bool topmoves);
+ static void softmax(const std::vector& input,
+ std::vector& output,
+ float temperature = 1.0f);
+ // tianshou_code
+ static void show_once(std::string hash_key) {
+ printf("%s\n", hash_key.c_str());
+ }
+
+private:
+ static Netresult get_scored_moves_internal(
+ GameState * state, NNPlanes & planes, int rotation);
+ static void gather_features(GameState * state, NNPlanes & planes);
+ static int rotate_nn_idx(const int vertex, int symmetry);
+ static int rev_rotate_nn_idx(const int vertex, int symmetry);
+};
+
+#endif
diff --git a/tianshou/__init__.py b/tianshou/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tianshou/core/__init__.py b/tianshou/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tianshou/core/global_config.json b/tianshou/core/global_config.json
new file mode 100644
index 0000000..19d0ca3
--- /dev/null
+++ b/tianshou/core/global_config.json
@@ -0,0 +1,5 @@
+{
+ "global_description": "read by Environment, Neural Network, and MCTS",
+ "state_space": " ",
+ "action_space": " "
+}
diff --git a/tianshou/core/mcts/__init__.py b/tianshou/core/mcts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tianshou/core/mcts/evaluator.py b/tianshou/core/mcts/evaluator.py
new file mode 100644
index 0000000..85041be
--- /dev/null
+++ b/tianshou/core/mcts/evaluator.py
@@ -0,0 +1,20 @@
+import numpy as np
+
+class evaluator(object):
+ def __init__(self, env, action_num):
+ self.env = env
+ self.action_num = action_num
+
+ def __call__(self, state):
+ raise NotImplementedError("Need to implement the evaluator")
+
+class rollout_policy(evaluator):
+ def __init__(self, env, action_num):
+ super(rollout_policy, self).__init__(env, action_num)
+ self.is_terminated = False
+
+ def __call__(self, state):
+ # TODO: prior for rollout policy
+ while not self.is_terminated:
+ action = np.random.randint(0,self.action_num)
+ state, is_terminated = self.env.step_forward(state, action)
\ No newline at end of file
diff --git a/tianshou/core/mcts/mcts.py b/tianshou/core/mcts/mcts.py
new file mode 100644
index 0000000..521b455
--- /dev/null
+++ b/tianshou/core/mcts/mcts.py
@@ -0,0 +1,131 @@
+import numpy as np
+import math
+import time
+
+c_puct = 1
+
+
+class MCTSNode(object):
+ def __init__(self, parent, action, state, action_num, prior):
+ self.parent = parent
+ self.action = action
+ self.children = {}
+ self.state = state
+ self.action_num = action_num
+ self.prior = prior
+
+ def selection(self):
+ raise NotImplementedError("Need to implement function selection")
+
+ def backpropagation(self, action, value):
+ raise NotImplementedError("Need to implement function backpropagation")
+
+ def expansion(self, simulator, action):
+ raise NotImplementedError("Need to implement function expansion")
+
+ def simulation(self, state, evaluator):
+ raise NotImplementedError("Need to implement function simulation")
+
+
+class UCTNode(MCTSNode):
+ def __init__(self, parent, action, state, action_num, prior):
+ super(UCTNode, self).__init__(parent, action, state, action_num, prior)
+ self.Q = np.zeros([action_num])
+ self.W = np.zeros([action_num])
+ self.N = np.zeros([action_num])
+ self.ucb = self.Q + c_puct * self.prior * math.sqrt(np.sum(self.N)) / (self.N + 1)
+ self.is_terminated = False
+
+ def selection(self):
+ if not self.is_terminated:
+ action = np.argmax(self.ucb)
+ if action in self.children.keys():
+ node, action = self.children[action].selection()
+ else:
+ node = self
+ else:
+ action = None
+ node = self
+ return node, action
+
+ def backpropagation(self, action, value):
+ if action is None:
+ if self.parent is not None:
+ self.parent.backpropagation(self.action, value)
+ else:
+ self.N[action] += 1
+ self.W[action] += value
+ for i in range(self.action_num):
+ if self.N[i] != 0:
+ self.Q[i] = (self.W[i] + 0.)/self.N[i]
+ self.ucb = self.Q + c_puct * self.prior * math.sqrt(np.sum(self.N)) / (self.N + 1.)
+ if self.parent is not None:
+ self.parent.backpropagation(self.action, value)
+
+ def expansion(self, simulator, action):
+ next_state, is_terminated = simulator.step_forward(self.state, action)
+ # TODO: Let users/evaluator give the prior
+ prior = np.ones([self.action_num]) / self.action_num
+ self.children[action] = UCTNode(self, action, next_state, self.action_num, prior)
+ self.children[action].is_terminated = is_terminated
+
+ def simulation(self, evaluator, state):
+ value = evaluator(state)
+ return value
+
+
+class TSNode(MCTSNode):
+ def __init__(self, parent, action, state, action_num, prior, method="Gaussian"):
+ super(TSNode, self).__init__(parent, action, state, action_num, prior)
+ if method == "Beta":
+ self.alpha = np.ones([action_num])
+ self.beta = np.ones([action_num])
+ if method == "Gaussian":
+ self.mu = np.zeros([action_num])
+ self.sigma = np.zeros([action_num])
+
+
+class ActionNode:
+ def __init__(self, parent, action):
+ self.parent = parent
+ self.action = action
+ self.children = {}
+ self.value = {}
+
+
+class MCTS:
+ def __init__(self, simulator, evaluator, root, action_num, prior, method="UCT", max_step=None, max_time=None):
+ self.simulator = simulator
+ self.evaluator = evaluator
+ if method == "UCT":
+ self.root = UCTNode(None, None, root, action_num, prior)
+ if method == "TS":
+ self.root = TSNode(None, None, root, action_num, prior)
+ if max_step is not None:
+ self.step = 0
+ self.max_step = max_step
+ if max_time is not None:
+ self.start_time = time.time()
+ self.max_time = max_time
+ if max_step is None and max_time is None:
+ raise ValueError("Need a stop criteria!")
+ while (max_step is not None and self.step < self.max_step or max_step is None) \
+ and (max_time is not None and time.time() - self.start_time < self.max_time or max_time is None):
+ print(self.root.Q)
+ self.expand()
+ if max_step is not None:
+ self.step += 1
+
+ def expand(self):
+ node, new_action = self.root.selection()
+ if new_action is None:
+ value = node.simulation(self.evaluator, node.state)
+ node.backpropagation(new_action, value)
+ else:
+ node.expansion(self.simulator, new_action)
+ value = node.simulation(self.evaluator, node.children[new_action].state)
+ node.backpropagation(new_action, value)
+
+
+if __name__=="__main__":
+ pass
\ No newline at end of file
diff --git a/tianshou/core/mcts/mcts_test.py b/tianshou/core/mcts/mcts_test.py
new file mode 100644
index 0000000..f708b39
--- /dev/null
+++ b/tianshou/core/mcts/mcts_test.py
@@ -0,0 +1,34 @@
+import numpy as np
+from mcts import MCTS
+import matplotlib.pyplot as plt
+
+class TestEnv:
+ def __init__(self, max_step=5):
+ self.max_step = max_step
+ self.reward = {i:np.random.uniform() for i in range(2**max_step)}
+ # self.reward = {0:0.8, 1:0.2, 2:0.4, 3:0.6}
+ self.best = max(self.reward.items(), key=lambda x:x[1])
+ # print("The best arm is {} with expected reward {}".format(self.best[0],self.best[1]))
+ print(self.reward)
+
+ def step_forward(self, state, action):
+ if action != 0 and action != 1:
+ raise ValueError("Action must be 0 or 1! Your action is {}".format(action))
+ if state[0] >= 2**state[1] or state[1] >= self.max_step:
+ raise ValueError("Invalid State! Your state is {}".format(state))
+ # print("Operate action {} at state {}, timestep {}".format(action, state[0], state[1]))
+ new_state = [0,0]
+ new_state[0] = state[0] + 2**state[1]*action
+ new_state[1] = state[1] + 1
+ if new_state[1] == self.max_step:
+ reward = int(np.random.uniform() < self.reward[state[0]])
+ is_terminated = True
+ else:
+ reward = 0
+ is_terminated = False
+ return new_state, reward, is_terminated
+
+if __name__=="__main__":
+ env = TestEnv(3)
+ evaluator = lambda state: env.step_forward(state, action)
+ mcts = MCTS(env, evaluator, [0,0], 2, np.array([0.5,0.5]), max_step=1e4)
diff --git a/tianshou/core/policy_value.json b/tianshou/core/policy_value.json
new file mode 100644
index 0000000..e69de29
diff --git a/tianshou/core/state_mask.json b/tianshou/core/state_mask.json
new file mode 100644
index 0000000..1d934fe
--- /dev/null
+++ b/tianshou/core/state_mask.json
@@ -0,0 +1,4 @@
+{
+ "state" : "10",
+ "mask" : "1000"
+}