From f09ebc212432a1033cdcfefe1a59fa2d6aaa112f Mon Sep 17 00:00:00 2001 From: Tongzheng Ren Date: Sun, 5 Nov 2017 16:37:26 +0800 Subject: [PATCH 1/2] minor modification --- AlphaGo/Network.py | 7 +++++-- AlphaGo/Network.pyc | Bin 0 -> 4688 bytes AlphaGo/multi_gpu.pyc | Bin 0 -> 2425 bytes 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 AlphaGo/Network.pyc create mode 100644 AlphaGo/multi_gpu.pyc diff --git a/AlphaGo/Network.py b/AlphaGo/Network.py index f594be2..d350247 100644 --- a/AlphaGo/Network.py +++ b/AlphaGo/Network.py @@ -50,7 +50,7 @@ p = policy_heads(h, is_training) loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1)))) reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = loss + reg -train_op = tf.train.RMSPropOptimizer(1e-2).minimize(total_loss) +train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) saver = tf.train.Saver(max_to_keep=10, var_list=var_list) @@ -104,4 +104,7 @@ def forward(board): saver.restore(sess, ckpt_file) else: raise ValueError("No model loaded") - return sess.run([p,v], feed_dict={x:board}) \ No newline at end of file + return sess.run([p,v], feed_dict={x:board}) + +if __name__ == "__main__": + train() \ No newline at end of file diff --git a/AlphaGo/Network.pyc b/AlphaGo/Network.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8691e5341c2c52cefff213142ebd5644ab3eb4c3 GIT binary patch literal 4688 zcmc&%OLH4p6+Ty%AChIu??;{vaS}})4rIVQYKkF~q?{BUXSfOzP%+ijR$oaix4Jvs zSB{;LHh5UGrYQabTZ&@MV*UX8nJp`d{lIrlOO8T01w$2uq&~X$+;d;&@txCSf1fEH z{_(4y*J<#X!vBx(q+cl_4?jf#QB4ioIjW5f+j*+xhwX8yjSt%s)SjgF6xF84%h6|; zULbEw8b$K*(wHWrs9h2|LmuX0iwXHnQZr8{O4Z6EGIK1`B;n7G@K5JXQ8Oo_3ulcM zhNFvQ3S_YIJe}Y!OLTOV%rbdJ%%z8)EK_YoB$i%Fy!abEH#qMpYn4oKq`M{?t_wX! z2K%ql8dk%P@0bmoCxflgy&&`=wJ(ubBX5e2!NNk zTCH7^vFp^nLEbbEw<~0>aijLmo0D$}^VU%QHhCrTpte~ux9Eg&wL9dM#U|Iuo0G;C zTccW;hadR=z-O8FiB_vC-q+$e+Wjoo1{Dq+iCno6HBG~(`t+yQB*Uq9F z+#B7+a@^w*q4n)?g)MEF_+i|&8V%BXQJ9ISC9;W0eXr{V6&@6l@+N!6zKoKCt@v0r z8A>_(K~z7Ogh0B)6I2o38>*}p)TYp)5*Op!ox0d>(D~0@3~n{~+RW^3@F}=hL0qgj zbTNsQlT#O)VjqD+3_K3Pa5uz9wiM9nUnk}u37LTdWvXGV;Gj?v6Y&Gh&Y}4_n)N8! z(|rF^bNX#etFQxV*1cxyYF42ZUWr@mOqz|Q%OIBLHv-pMn28yUZV(*9Ps0#?YCO%F ztH_@0VXw`m7kB{A*@oh+;wbRz$4<+*Udlm`@>-*2Dyy4knCNo|{eMlA0m@lcV*6+I z9CeNVsYw|ozLrV#G8Cv+KqOYJ8@B!n1G2*$xPq_B=8!Ra9n9zsdd#m-}-0i+H51E={Pd83c)F+u(Dfeb=& zy*Ezh`iSW%>JMZ6gy`2-a(%?{6sHzoArc9g*j#Z3TsQ$(I1CynM2HRwlzh&}(4J-v zhISs?618V|5oY8_g+xbxRsb<15C}Z{CrDmCG;Me>orptTb9AT~}*_(Q5( z<>r-jyGGkk+S^9gF$(RzW$roJw&%%Sply^{zksUes6S8r1?n$Se~EfC)V@ePR7g-_ zZ&o@BSx1hr0$Uv9DETM{sAL@?jhA2RriVkaZZ3%~F7t9&2){R&vjgVIIBS1YucI1+ z8Ti^WG~BbIP`I4#2}|{%G@N2XY&S{0Wm;wXy{Pc-`HR2M^XD-6Pkshct+1o+rHr@8 z@$Oa_zmnv+AGwK_N(jF6!&K5|jQj>Ue}f4glKi%H7b&e9*z|6C4m@pA8zrb5Had}K zf{jKJbv8cjZ*6T^#`A_Tp5yr_2RO#k?|?jtqk8KHkh#6_#F|8^gsl6_@0hH$8>MO1 z(x%A`e~G8B4X0y+)46EZ4`N%ng2629$8wJ#ay`vQl!J6|WZ!8bnR7O&Ck7RclbSR| zje^pMDCvfhcij6pVn3Y>b&;1rP`T1jT2 z)CSi)HbWfu&?J(+pgkze2sd%Vrpa&*SC=`cgqX**23lU!kx6N*+h_zvut&CbqTwae zG&{a&N^No|G!}X!^i^g|U-o&Hbd99NY_m8vp(g_!14_(88dYM}@Zm{vTSsUhG?9st7eqQvqJ7ztZzd;sYe}GQVD<0gNq0dA z6&lxTMKvYg2{obS@s!mf>bH`b#s8w3Q7dW=+zGVbQbp(O*RkZXblWR<*;JeN(*1Kl2WHsSC+`7r+7Kr%8$DHRMB}tTMh7a zrZ6r*83)ILDnZoLTpVX06g4cWA$^Y-d}-J7oKWx-h&A%IC^5E^xN+;p<~T)8$59^b z{B-xeep0*t?8(zTXZLBf`lzbk<&~tUiY(xk16v%xa_y}@tbUY4@zWSUz^{SX=>Wvs z)8FE$T){ol_n++D|6uo#^D)-vMpZL}%L%ItyP5w~!F%lk(h`pzd0*q}zlSu|4a^T& rYw0&ZN{Da%F60WOLa{Ktsn$5Qa~%7NK!CFP5dES$pIcVvRiW}vc1&-M literal 0 HcmV?d00001 diff --git a/AlphaGo/multi_gpu.pyc b/AlphaGo/multi_gpu.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d269ca56119288768409d95fa3ced90194c4bf7d GIT binary patch literal 2425 zcmaJ?UvDEd5TDKFf7Ab`6DL5jR0KhX`f@xK1Xs0p2hu4zrH3jaPK&#Ck}bRI?b@5R zr{t+T@-2Aa6YvH29DEBNd11y*wrNYd$$IVacsw)y{XG6ZtL>ege}4>N{#5b%{AUmJ z@7NLe_aFvHJQ#T3(YRvA6&O_9*tcUJ2EH3tA+ExpX4lpru0zs*K?6KlgVQFQwm{TD zG(a@r3RldfZ4fPxD?>6v`lrc_TO;~LyKO%-hXxEmN#&CBsHbUW~ontF21hO z9qc7VY$8h6Zgy!X;bAU0o`ZJlwosyJZ}8T=ju%qMZG6kH|H8otVCKQapA{u6@Fb0dI#f!iC+0vSs6P&;;Zz zory(ne}j4!xg_I41SysstZXvvT6hy$94ejPb{4s3L3ZuHLm;BEumG|%@-0d4$JoLmOKjL>qLnnmfuhv a)sL~+@bFjZcpLs>EPD;F Date: Sun, 5 Nov 2017 16:38:20 +0800 Subject: [PATCH 2/2] delete .pyc --- .gitignore | 1 + AlphaGo/Network.py | 110 ------------------------------------------- AlphaGo/multi_gpu.py | 75 ----------------------------- 3 files changed, 1 insertion(+), 185 deletions(-) delete mode 100644 AlphaGo/Network.py delete mode 100644 AlphaGo/multi_gpu.py diff --git a/.gitignore b/.gitignore index b9ae745..85c32a8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .idea leela-zero +.pyc diff --git a/AlphaGo/Network.py b/AlphaGo/Network.py deleted file mode 100644 index d350247..0000000 --- a/AlphaGo/Network.py +++ /dev/null @@ -1,110 +0,0 @@ -import tensorflow as tf -import numpy as np -import time -import multi_gpu -import tensorflow.contrib.layers as layers - -def residual_block(input, is_training): - normalizer_params = {'is_training': is_training, - 'updates_collections': None} - h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, - normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, - weights_regularizer=layers.l2_regularizer(1e-4)) - residual = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity, - normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, - weights_regularizer=layers.l2_regularizer(1e-4)) - h = h + residual - return tf.nn.relu(h) - -def policy_heads(input, is_training): - normalizer_params = {'is_training': is_training, - 'updates_collections': None} - h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu, - normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, - weights_regularizer=layers.l2_regularizer(1e-4)) - h = layers.flatten(h) - h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4)) - return h - -def value_heads(input, is_training): - normalizer_params = {'is_training': is_training, - 'updates_collections': None} - h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu, - normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, - weights_regularizer=layers.l2_regularizer(1e-4)) - h = layers.flatten(h) - h = layers.fully_connected(h, 256, activation_fn=tf.nn.relu, weights_regularizer=layers.l2_regularizer(1e-4)) - h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4)) - return h - -x = tf.placeholder(tf.float32,shape=[None,19,19,17]) -is_training = tf.placeholder(tf.bool, shape=[]) -z = tf.placeholder(tf.float32, shape=[None, 1]) -pi = tf.placeholder(tf.float32, shape=[None, 362]) - -h = residual_block(x, is_training) -for i in range(18): - h = residual_block(h, is_training) -v = value_heads(h, is_training) -p = policy_heads(h, is_training) -loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1)))) -reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) -total_loss = loss + reg -train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss) - -var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) -saver = tf.train.Saver(max_to_keep=10, var_list=var_list) -def train(): - data = np.load("data.npz") - boards = data["boards"] - wins = data["wins"] - ps = data["ps"] - print (boards.shape) - print (wins.shape) - print (ps.shape) - epochs = 100 - batch_size = 32 - batch_num = boards.shape[0] // batch_size - result_path = "./results/" - with multi_gpu.create_session() as sess: - sess.run(tf.global_variables_initializer()) - ckpt_file = tf.train.latest_checkpoint(result_path) - if ckpt_file is not None: - print('Restoring model from {}...'.format(ckpt_file)) - saver.restore(sess, ckpt_file) - for epoch in range(epochs): - time_train = -time.time() - index = np.arange(boards.shape[0]) - np.random.shuffle(index) - losses = [] - regs = [] - for iter in range(batch_num): - _, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]], - z:wins[index[iter*batch_size:(iter+1)*batch_size]], - pi:ps[index[iter*batch_size:(iter+1)*batch_size]], - is_training:True}) - losses.append(l) - regs.append(r) - if iter % 1 == 0: - print("Epoch: {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs)))) - time_train=-time.time() - losses = [] - regs = [] - if iter % 20 == 0: - save_path = "Epoch{}.Iteration{}.ckpt".format(epoch, iter) - saver.save(sess, result_path + save_path) - -def forward(board): - result_path = "./results/" - with multi_gpu.create_session() as sess: - sess.run(tf.global_variables_initializer()) - ckpt_file = tf.train.latest_checkpoint(result_path) - if ckpt_file is not None: - print('Restoring model from {}...'.format(ckpt_file)) - saver.restore(sess, ckpt_file) - else: - raise ValueError("No model loaded") - return sess.run([p,v], feed_dict={x:board}) - -if __name__ == "__main__": - train() \ No newline at end of file diff --git a/AlphaGo/multi_gpu.py b/AlphaGo/multi_gpu.py deleted file mode 100644 index 55dfaa8..0000000 --- a/AlphaGo/multi_gpu.py +++ /dev/null @@ -1,75 +0,0 @@ - #!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import tensorflow as tf -from six.moves import zip - - -tf.flags.DEFINE_integer('num_gpus', 1, """How many GPUs to use""") -tf.flags.DEFINE_boolean('log_device_placement', False, - """Whether to log device placement.""") -FLAGS = tf.flags.FLAGS - - -def create_session(): - config = tf.ConfigProto(allow_soft_placement=True, - log_device_placement=FLAGS.log_device_placement) - return tf.Session(config=config) - - -def average_gradients(tower_grads): - """ - Calculate the average gradient for each shared variable across all towers. - - Note that this function provides a synchronization point across all towers. - - :param tower_grads: List of lists of (gradient, variable) tuples. - The outer list is over individual gradients. The inner list is over - the gradient calculation for each tower. - :return: List of pairs of (gradient, variable) where the gradient has - been averaged across all towers. - """ - average_grads = [] - for grad_and_vars in zip(*tower_grads): - # Note that each grad_and_vars looks like the following: - # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) - if grad_and_vars[0][0] is None: - continue - grads = [] - for g, _ in grad_and_vars: - # Add 0 dimension to the gradients to represent the tower. - expanded_g = tf.expand_dims(g, 0) - - # Append on a 'tower' dimension which we will average over below. - grads.append(expanded_g) - - # Average over the 'tower' dimension. - grad = tf.concat(grads, 0) - grad = tf.reduce_mean(grad, 0) - - # Keep in mind that the Variables are redundant because they are shared - # across towers. So .. we will just return the first tower's pointer to - # the Variable. - v = grad_and_vars[0][1] - grad_and_var = (grad, v) - average_grads.append(grad_and_var) - return average_grads - - -def average_losses(tower_losses): - """ - Calculate the average loss or other quantity for all towers. - - :param tower_losses: A list of lists of quantities. The outer list is over - towers. The inner list is over losses or other quantities for each - tower. - :return: A list of quantities that have been averaged over all towers. - """ - ret = [] - for quantities in zip(*tower_losses): - ret.append(tf.add_n(quantities) / len(quantities)) - return ret