711 lines
24 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
This file is part of Leela Zero.
Copyright (C) 2017 Gian-Carlo Pascutto
Leela Zero is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Leela Zero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Leela Zero. If not, see <http://www.gnu.org/licenses/>.
*/
#include "config.h"
#include <algorithm>
#include <cassert>
#include <iostream>
#include <fstream>
#include <iterator>
#include <string>
#include <memory>
#include <cmath>
#include <array>
#include <thread>
#include <boost/utility.hpp>
#include <boost/format.hpp>
//#include <unistd.h>
#include <string.h>
#include "Im2Col.h"
#ifdef __APPLE__
#include <Accelerate/Accelerate.h>
#endif
#ifdef USE_MKL
#include <mkl.h>
#endif
#ifdef USE_OPENBLAS
#include <cblas.h>
#endif
#ifdef USE_OPENCL
#include "OpenCL.h"
#include "UCTNode.h"
#endif
#include "SGFTree.h"
#include "SGFParser.h"
#include "Utils.h"
#include "FastBoard.h"
#include "Random.h"
#include "Network.h"
#include "GTP.h"
#include "Utils.h"
using namespace Utils;
// Input + residual block tower
std::vector<std::vector<float>> conv_weights;
std::vector<std::vector<float>> conv_biases;
std::vector<std::vector<float>> batchnorm_means;
std::vector<std::vector<float>> batchnorm_variances;
// Policy head
std::vector<float> conv_pol_w;
std::vector<float> conv_pol_b;
std::array<float, 2> bn_pol_w1;
std::array<float, 2> bn_pol_w2;
std::array<float, 261364> ip_pol_w;
std::array<float, 362> ip_pol_b;
// Value head
std::vector<float> conv_val_w;
std::vector<float> conv_val_b;
std::array<float, 1> bn_val_w1;
std::array<float, 1> bn_val_w2;
std::array<float, 92416> ip1_val_w;
std::array<float, 256> ip1_val_b;
std::array<float, 256> ip2_val_w;
std::array<float, 1> ip2_val_b;
void Network::benchmark(GameState *state) {
{
int BENCH_AMOUNT = 1600;
int cpus = cfg_num_threads;
int iters_per_thread = (BENCH_AMOUNT + (cpus - 1)) / cpus;
Time start;
ThreadGroup tg(thread_pool);
for (int i = 0; i < cpus; i++) {
tg.add_task([iters_per_thread, state]() {
GameState mystate = *state;
for (int loop = 0; loop < iters_per_thread; loop++) {
auto vec = get_scored_moves(&mystate, Ensemble::RANDOM_ROTATION);
}
});
};
tg.wait_all();
Time end;
myprintf("%5d evaluations in %5.2f seconds -> %d n/s\n",
BENCH_AMOUNT,
(float) Time::timediff(start, end) / 100.0,
(int) ((float) BENCH_AMOUNT / ((float) Time::timediff(start, end) / 100.0)));
}
}
void Network::initialize(void) {
#ifdef USE_OPENCL
myprintf("Initializing OpenCL\n");
opencl.initialize();
// Count size of the network
myprintf("Detecting residual layers...");
std::ifstream wtfile(cfg_weightsfile);
if (wtfile.fail()) {
myprintf("Could not open weights file: %s\n", cfg_weightsfile.c_str());
exit(EXIT_FAILURE);
}
std::string line;
auto linecount = size_t{0};
while (std::getline(wtfile, line)) {
// Second line of parameters are the convolution layer biases,
// so this tells us the amount of channels in the residual layers.
// (Provided they're all equally large - that's not actually required!)
if (linecount == 1) {
std::stringstream ss(line);
auto count = std::distance(std::istream_iterator<std::string>(ss),
std::istream_iterator<std::string>());
myprintf("%d channels...", count);
}
linecount++;
}
// 1 input layer (4 x weights), 14 ending weights, the rest are residuals
// every residual has 8 x weight lines
auto residual_layers = linecount - (4 + 14);
if (residual_layers % 8 != 0) {
myprintf("\nInconsistent number of weights in the file.\n");
exit(EXIT_FAILURE);
}
residual_layers /= 8;
myprintf("%d layers\nTransferring weights to GPU...", residual_layers);
// Re-read file and process
wtfile.clear();
wtfile.seekg(0, std::ios::beg);
auto plain_conv_layers = 1 + (residual_layers * 2);
auto plain_conv_wts = plain_conv_layers * 4;
linecount = 0;
while (std::getline(wtfile, line)) {
std::vector<float> weights;
float weight;
std::istringstream iss(line);
while (iss >> weight) {
weights.emplace_back(weight);
}
if (linecount < plain_conv_wts) {
if (linecount % 4 == 0) {
conv_weights.emplace_back(weights);
} else if (linecount % 4 == 1) {
conv_biases.emplace_back(weights);
} else if (linecount % 4 == 2) {
batchnorm_means.emplace_back(weights);
} else if (linecount % 4 == 3) {
batchnorm_variances.emplace_back(weights);
}
} else if (linecount == plain_conv_wts) {
conv_pol_w = std::move(weights);
} else if (linecount == plain_conv_wts + 1) {
conv_pol_b = std::move(weights);
} else if (linecount == plain_conv_wts + 2) {
std::copy(begin(weights), end(weights), begin(bn_pol_w1));
} else if (linecount == plain_conv_wts + 3) {
std::copy(begin(weights), end(weights), begin(bn_pol_w2));
} else if (linecount == plain_conv_wts + 4) {
std::copy(begin(weights), end(weights), begin(ip_pol_w));
} else if (linecount == plain_conv_wts + 5) {
std::copy(begin(weights), end(weights), begin(ip_pol_b));
} else if (linecount == plain_conv_wts + 6) {
conv_val_w = std::move(weights);
} else if (linecount == plain_conv_wts + 7) {
conv_val_b = std::move(weights);
} else if (linecount == plain_conv_wts + 8) {
std::copy(begin(weights), end(weights), begin(bn_val_w1));
} else if (linecount == plain_conv_wts + 9) {
std::copy(begin(weights), end(weights), begin(bn_val_w2));
} else if (linecount == plain_conv_wts + 10) {
std::copy(begin(weights), end(weights), begin(ip1_val_w));
} else if (linecount == plain_conv_wts + 11) {
std::copy(begin(weights), end(weights), begin(ip1_val_b));
} else if (linecount == plain_conv_wts + 12) {
std::copy(begin(weights), end(weights), begin(ip2_val_w));
} else if (linecount == plain_conv_wts + 13) {
std::copy(begin(weights), end(weights), begin(ip2_val_b));
}
linecount++;
}
wtfile.close();
// input
size_t weight_index = 0;
opencl_net.push_convolve(3, conv_weights[weight_index],
conv_biases[weight_index]);
opencl_net.push_batchnorm(361, batchnorm_means[weight_index],
batchnorm_variances[weight_index]);
weight_index++;
// residual blocks
for (auto i = size_t{0}; i < residual_layers; i++) {
opencl_net.push_residual(3, conv_weights[weight_index],
conv_biases[weight_index],
batchnorm_means[weight_index],
batchnorm_variances[weight_index],
conv_weights[weight_index + 1],
conv_biases[weight_index + 1],
batchnorm_means[weight_index + 1],
batchnorm_variances[weight_index + 1]);
weight_index += 2;
}
myprintf("done\n");
#endif
#ifdef USE_BLAS
#ifndef __APPLE__
#ifdef USE_OPENBLAS
openblas_set_num_threads(1);
myprintf("BLAS Core: %s\n", openblas_get_corename());
#endif
#ifdef USE_MKL
//mkl_set_threading_layer(MKL_THREADING_SEQUENTIAL);
mkl_set_num_threads(1);
MKLVersion Version;
mkl_get_version(&Version);
myprintf("BLAS core: MKL %s\n", Version.Processor);
#endif
#endif
#endif
}
#ifdef USE_BLAS
template<unsigned int filter_size,
unsigned int outputs>
void convolve(const std::vector<float> &input,
const std::vector<float> &weights,
const std::vector<float> &biases,
std::vector<float> &output) {
// fixed for 19x19
constexpr unsigned int width = 19;
constexpr unsigned int height = 19;
constexpr unsigned int spatial_out = width * height;
constexpr unsigned int filter_len = filter_size * filter_size;
auto channels = int(weights.size() / (biases.size() * filter_len));
unsigned int filter_dim = filter_len * channels;
std::vector<float> col(filter_dim * width * height);
im2col<filter_size>(channels, input, col);
// Weight shape (output, input, filter_size, filter_size)
// 96 22 5 5
// outputs[96,19x19] = weights[96,22x9] x col[22x9,19x19]
// C←αAB + βC
// M Number of rows in matrices A and C.
// N Number of columns in matrices B and C.
// K Number of columns in matrix A; number of rows in matrix B.
// lda The size of the first dimention of matrix A; if you are
// passing a matrix A[m][n], the value should be m.
// cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
// ldb, beta, C, N);
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
// M N K
outputs, spatial_out, filter_dim,
1.0f, &weights[0], filter_dim,
&col[0], spatial_out,
0.0f, &output[0], spatial_out);
for (unsigned int o = 0; o < outputs; o++) {
for (unsigned int b = 0; b < spatial_out; b++) {
output[(o * spatial_out) + b] =
biases[o] + output[(o * spatial_out) + b];
}
}
}
template<unsigned int inputs,
unsigned int outputs,
size_t W, size_t B>
void innerproduct(const std::vector<float> &input,
const std::array<float, W> &weights,
const std::array<float, B> &biases,
std::vector<float> &output) {
assert(B == outputs);
cblas_sgemv(CblasRowMajor, CblasNoTrans,
// M K
outputs, inputs,
1.0f, &weights[0], inputs,
&input[0], 1,
0.0f, &output[0], 1);
auto lambda_ReLU = [](float val) {
return (val > 0.0f) ?
val : 0.0f;
};
for (unsigned int o = 0; o < outputs; o++) {
float val = biases[o] + output[o];
if (outputs == 256) {
val = lambda_ReLU(val);
}
output[o] = val;
}
}
template<unsigned int channels,
unsigned int spatial_size>
void batchnorm(const std::vector<float> &input,
const std::array<float, channels> &means,
const std::array<float, channels> &variances,
std::vector<float> &output) {
constexpr float epsilon = 1e-5f;
auto lambda_ReLU = [](float val) {
return (val > 0.0f) ?
val : 0.0f;
};
for (unsigned int c = 0; c < channels; ++c) {
float mean = means[c];
float variance = variances[c] + epsilon;
float scale_stddiv = 1.0f / std::sqrt(variance);
float *out = &output[c * spatial_size];
float const *in = &input[c * spatial_size];
for (unsigned int b = 0; b < spatial_size; b++) {
out[b] = lambda_ReLU(scale_stddiv * (in[b] - mean));
}
}
}
#endif
void Network::softmax(const std::vector<float> &input,
std::vector<float> &output,
float temperature) {
assert(&input != &output);
float alpha = *std::max_element(input.begin(),
input.begin() + output.size());
alpha /= temperature;
float denom = 0.0f;
std::vector<float> helper(output.size());
for (size_t i = 0; i < output.size(); i++) {
float val = std::exp((input[i] / temperature) - alpha);
helper[i] = val;
denom += val;
}
for (size_t i = 0; i < output.size(); i++) {
output[i] = helper[i] / denom;
}
}
/* magic code, only execute once, what is the mechanism?
void function() {
static const auto runOnce = [] (auto content) { std::cout << content << std::endl; return true;};
}
*/
std::string exec(const char* cmd) {
std::array<char, 128> buffer;
std::string result;
std::shared_ptr<FILE> pipe(popen(cmd, "r"), pclose);
if (!pipe) throw std::runtime_error("popen() failed!");
while (!feof(pipe.get())) {
if (fgets(buffer.data(), 128, pipe.get()) != nullptr)
result += buffer.data();
}
return result;
}
Network::Netresult Network::get_scored_moves(
GameState *state, Ensemble ensemble, int rotation) {
Netresult result;
if (state->board.get_boardsize() != 19) {
return result;
}
NNPlanes planes;
gather_features(state, planes);
/*
* tianshou_code
* writing the board information to local file
*/
//static std::once_flag plane_size_flag;
//std::call_once ( plane_size_flag, [&]{ printf("Network::get_scored_moves planses size : %d\n", planes.size());} );
//std::call_once ( plane_content_flag, [&]{
/*
for (int i = 0; i < board_size; ++i) {
for (int j = 0; j < board_size; ++j) {
std::cout << planes[k][i * board_size + j] << " ";
}
printf("\n");
}
printf("======================================\n");
*/
// } );
static int call_number = 0;
call_number++;
std::ofstream mctsnn_file;
mctsnn_file.open("/home/yama/rl/tianshou/leela-zero/src/mcts_nn_files/board_" + std::to_string(call_number));
const int total_count = board_size * board_size;
for (int k = 0; k < planes.size(); ++k) {
for (int i = 0; i < total_count; ++i) {
mctsnn_file << planes[k][i];
}
mctsnn_file << '\n';
}
mctsnn_file.close();
const int extended_board_size = board_size + 2;
Network::Netresult nn_res;
std::string cmd = "python /home/yama/rl/tianshou/AlphaGo/Network.py " + std::to_string(call_number);
std::string res = exec(cmd.c_str());
//std::cout << res << std::endl;
std::string buf; // Have a buffer string
std::stringstream ss(res); // Insert the string into a stream
const int policy_size = board_size * board_size + 1;
int idx = 0;
for (int i = 0; i < extended_board_size; ++i) {
for (int j = 0; j < extended_board_size; ++j) {
if ((0 < i) && (i < board_size + 1) && (0 < j) && (j < board_size + 1)) {
ss >> buf;
nn_res.first.emplace_back(std::make_pair(std::stod(buf), idx));
//std::cout << std::fixed << "[" << std::stod(buf) << "," << idx << "]\n";
}
idx++;
}
}
// probability of pass
ss >> buf;
nn_res.first.emplace_back(std::make_pair(std::stod(buf), -1));
std::cout << "tianshou nn output : \t\n";
auto max_iterator = std::max_element(nn_res.first.begin(), nn_res.first.end());
int argmax = std::distance(nn_res.first.begin(), max_iterator);
int line = (argmax / board_size) + 1, column = (argmax % board_size) + 1;
std::cout << "\tmove : " << argmax << " [" << line << "," << column << "]" << std::endl;
// evaluation of state value
ss >> buf;
nn_res.second = std::stod(buf);
std::cout << "\tvalue : " << nn_res.second << std::endl;
if (ensemble == DIRECT) {
assert(rotation >= 0 && rotation <= 7);
result = get_scored_moves_internal(state, planes, rotation);
} else {
assert(ensemble == RANDOM_ROTATION);
assert(rotation == -1);
int rand_rot = Random::get_Rng()->randfix<8>();
std::cout << "rotation : " << rand_rot << std::endl;
result = get_scored_moves_internal(state, planes, rand_rot);
}
/*
static std::once_flag of;
std::call_once(of, [&] { } );
for (auto ele: result.first) {
std::cout << std::fixed << "[" << ele.first << "," << ele.second << "]\n";
}
*/
std::cout << "leela nn output : \t\n";
max_iterator = std::max_element(result.first.begin(), result.first.end());
argmax = std::distance(result.first.begin(), max_iterator);
line = (argmax / board_size) + 1, column = (argmax % board_size) + 1;
std::cout << "\tmove : " << argmax << " [" << line << "," << column << "]" << std::endl;
std::cout << "\tvalue : " << result.second << std::endl;
return nn_res;
//return result;
}
Network::Netresult Network::get_scored_moves_internal(
GameState *state, NNPlanes &planes, int rotation) {
assert(rotation >= 0 && rotation <= 7);
constexpr int channels = INPUT_CHANNELS;
assert(channels == planes.size());
constexpr int width = 19;
constexpr int height = 19;
constexpr int max_channels = MAX_CHANNELS;
std::vector<float> input_data(max_channels * width * height);
std::vector<float> output_data(max_channels * width * height);
std::vector<float> policy_data_1(2 * width * height);
std::vector<float> policy_data_2(2 * width * height);
std::vector<float> value_data_1(1 * width * height);
std::vector<float> value_data_2(1 * width * height);
std::vector<float> policy_out((width * height) + 1);
std::vector<float> softmax_data((width * height) + 1);
std::vector<float> winrate_data(256);
std::vector<float> winrate_out(1);
for (int c = 0; c < channels; ++c) {
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
int vtx = rotate_nn_idx(h * 19 + w, rotation);
input_data[(c * height + h) * width + w] =
(float) planes[c][vtx];
}
}
}
#ifdef USE_OPENCL
opencl_net.forward(input_data, output_data);
// Get the moves
convolve<1, 2>(output_data, conv_pol_w, conv_pol_b, policy_data_1);
batchnorm<2, 361>(policy_data_1, bn_pol_w1, bn_pol_w2, policy_data_2);
innerproduct<2 * 361, 362>(policy_data_2, ip_pol_w, ip_pol_b, policy_out);
softmax(policy_out, softmax_data, cfg_softmax_temp);
std::vector<float> &outputs = softmax_data;
// Now get the score
convolve<1, 1>(output_data, conv_val_w, conv_val_b, value_data_1);
batchnorm<1, 361>(value_data_1, bn_val_w1, bn_val_w2, value_data_2);
innerproduct<361, 256>(value_data_2, ip1_val_w, ip1_val_b, winrate_data);
innerproduct<256, 1>(winrate_data, ip2_val_w, ip2_val_b, winrate_out);
// Sigmoid
float winrate_sig = (1.0f + std::tanh(winrate_out[0])) / 2.0f;
#elif defined(USE_BLAS) && !defined(USE_OPENCL)
#error "Not implemented"
// Not implemented yet - not very useful unless you have some
// sort of Xeon Phi
softmax(output_data, softmax_data, cfg_softmax_temp);
// Move scores
std::vector<float>& outputs = softmax_data;
#endif
std::vector<scored_node> result;
for (size_t idx = 0; idx < outputs.size(); idx++) {
if (idx < 19 * 19) {
auto rot_idx = rev_rotate_nn_idx(idx, rotation);
auto val = outputs[rot_idx];
int x = idx % 19;
int y = idx / 19;
int vtx = state->board.get_vertex(x, y);
if (state->board.get_square(vtx) == FastBoard::EMPTY) {
result.emplace_back(val, vtx);
}
} else {
result.emplace_back(outputs[idx], FastBoard::PASS);
}
}
return std::make_pair(result, winrate_sig);
}
void Network::show_heatmap(FastState *state, Netresult &result, bool topmoves) {
auto moves = result.first;
std::vector<std::string> display_map;
std::string line;
for (unsigned int y = 0; y < 19; y++) {
for (unsigned int x = 0; x < 19; x++) {
int vtx = state->board.get_vertex(x, y);
auto item = std::find_if(moves.cbegin(), moves.cend(),
[&vtx](scored_node const &item) {
return item.second == vtx;
});
float score = 0.0f;
// Non-empty squares won't be scored
if (item != moves.end()) {
score = item->first;
assert(vtx == item->second);
}
line += boost::str(boost::format("%3d ") % int(score * 1000));
if (x == 18) {
display_map.push_back(line);
line.clear();
}
}
}
for (int i = display_map.size() - 1; i >= 0; --i) {
myprintf("%s\n", display_map[i].c_str());
}
assert(result.first.back().second == FastBoard::PASS);
int pass_score = int(result.first.back().first * 1000);
myprintf("pass: %d\n", pass_score);
myprintf("winrate: %f\n", result.second);
if (topmoves) {
std::stable_sort(moves.rbegin(), moves.rend());
float cum = 0.0f;
size_t tried = 0;
while (cum < 0.85f && tried < moves.size()) {
if (moves[tried].first < 0.01f) break;
myprintf("%1.3f (%s)\n",
moves[tried].first,
state->board.move_to_text(moves[tried].second).c_str());
cum += moves[tried].first;
tried++;
}
}
}
void Network::gather_features(GameState *state, NNPlanes &planes) {
planes.resize(18);
const size_t our_offset = 0;
const size_t their_offset = 8;
BoardPlane &black_to_move = planes[16];
BoardPlane &white_to_move = planes[17];
bool whites_move = state->get_to_move() == FastBoard::WHITE;
// tianshou_code
//std::cout << "whites_move : " << whites_move << std::endl;
if (whites_move) {
white_to_move.set();
} else {
black_to_move.set();
}
// Go back in time, fill history boards
size_t backtracks = 0;
for (int h = 0; h < 8; h++) {
int tomove = state->get_to_move();
// collect white, black occupation planes
for (int j = 0; j < 19; j++) {
for (int i = 0; i < 19; i++) {
int vtx = state->board.get_vertex(i, j);
FastBoard::square_t color =
state->board.get_square(vtx);
int idx = j * 19 + i;
if (color != FastBoard::EMPTY) {
if (color == tomove) {
planes[our_offset + h][idx] = true;
} else {
planes[their_offset + h][idx] = true;
}
}
}
}
if (!state->undo_move()) {
break;
} else {
backtracks++;
}
}
// Now go back to present day
for (size_t h = 0; h < backtracks; h++) {
state->forward_move();
}
}
int Network::rev_rotate_nn_idx(const int vertex, int symmetry) {
static const int invert[] = {0, 1, 2, 3, 4, 6, 5, 7};
assert(rotate_nn_idx(rotate_nn_idx(vertex, symmetry), invert[symmetry])
== vertex);
return rotate_nn_idx(vertex, invert[symmetry]);
}
int Network::rotate_nn_idx(const int vertex, int symmetry) {
assert(vertex >= 0 && vertex < 19 * 19);
assert(symmetry >= 0 && symmetry < 8);
int x = vertex % 19;
int y = vertex / 19;
int newx;
int newy;
if (symmetry >= 4) {
std::swap(x, y);
symmetry -= 4;
}
if (symmetry == 0) {
newx = x;
newy = y;
} else if (symmetry == 1) {
newx = x;
newy = 19 - y - 1;
} else if (symmetry == 2) {
newx = 19 - x - 1;
newy = y;
} else {
assert(symmetry == 3);
newx = 19 - x - 1;
newy = 19 - y - 1;
}
int newvtx = (newy * 19) + newx;
assert(newvtx >= 0 && newvtx < 19 * 19);
return newvtx;
}