Tianshou/AlphaGo/reversi.py
2017-12-24 00:42:59 +08:00

203 lines
6.6 KiB
Python

import numpy as np
'''
Settings of the Reversi game.
(1, 1) is considered as the upper left corner of the board,
(size, 1) is the lower left
'''
class Reversi:
def __init__(self, black=None, white=None):
self.board = None # 8 * 8 board with 1 for black, -1 for white and 0 for blank
self.color = None # 1 for black and -1 for white
self.action = None # number in 0~63
self.winner = None
self.black_win = None
self.size = 8
def _deflatten(self, idx):
x = idx // self.size + 1
y = idx % self.size + 1
return (x, y)
def _flatten(self, vertex):
x, y = vertex
if (x == 0) and (y == 0):
return 64
return (x - 1) * self.size + (y - 1)
def get_board(self, board=None):
self.board = board or np.zeros([8,8])
self.board[3, 3] = -1
self.board[4, 4] = -1
self.board[3, 4] = 1
self.board[4, 3] = 1
return self.board
def _find_correct_moves(self, is_next=False):
moves = []
if is_next:
color = 0 - self.color
else:
color = self.color
for i in range(64):
x, y = self._deflatten(i)
valid = self._is_valid(x - 1, y - 1, color)
if valid:
moves.append(i)
return moves
def _one_direction_valid(self, x, y, color):
if (x >= 0) and (x < self.size):
if (y >= 0) and (y < self.size):
if self.board[x, y] == color:
return True
return False
def _is_valid(self, x, y, color):
if self.board[x, y]:
return False
for x_direction in [-1, 0, 1]:
for y_direction in [-1, 0, 1]:
new_x = x
new_y = y
flag = 0
while True:
new_x += x_direction
new_y += y_direction
if self._one_direction_valid(new_x, new_y, 0 - color):
flag = 1
else:
break
if self._one_direction_valid(new_x, new_y, color) and flag:
return True
return False
def simulate_get_mask(self, state, action_set):
history_boards, color = state
self.board = np.reshape(history_boards[-1], (self.size, self.size))
self.color = color
valid_moves = self._find_correct_moves()
print(valid_moves)
if not len(valid_moves):
invalid_action_mask = action_set[0:-1]
else:
invalid_action_mask = []
for action in action_set:
if action not in valid_moves:
invalid_action_mask.append(action)
return invalid_action_mask
def simulate_step_forward(self, state, action):
self.board = state[0].copy()
self.board = np.reshape(self.board, (self.size, self.size))
self.color = state[1]
self.action = action
if self.action == 64:
valid_moves = self._find_correct_moves(is_next=True)
if not len(valid_moves):
self._game_over()
return None, self.winner * self.color
else:
return [self.board, 0 - self.color], 0
self._step()
return [self.board, 0 - self.color], 0
def _game_over(self):
black_num, white_num = self._number_of_black_and_white()
self.black_win = black_num - white_num
if self.black_win > 0:
self.winner = 1
elif self.black_win < 0:
self.winner = -1
else:
self.winner = 0
def _number_of_black_and_white(self):
black_num = 0
white_num = 0
board_list = np.reshape(self.board, self.size ** 2)
for i in range(len(board_list)):
if board_list[i] == 1:
black_num += 1
elif board_list[i] == -1:
white_num += 1
return black_num, white_num
def _step(self):
if self.action < 0 or self.action > 63:
raise ValueError("Action not in the range of [0,63]!")
if self.action is None:
raise ValueError("Action is None!")
x, y = self._deflatten(self.action)
valid = self._flip(x -1, y - 1)
if not valid:
raise ValueError("Illegal action!")
def _flip(self, x, y):
valid = 0
self.board[x, y] = self.color
for x_direction in [-1, 0, 1]:
for y_direction in [-1, 0, 1]:
new_x = x
new_y = y
flag = 0
while True:
new_x += x_direction
new_y += y_direction
if self._one_direction_valid(new_x, new_y, 0 - self.color):
flag = 1
else:
break
if self._one_direction_valid(new_x, new_y, self.color) and flag:
valid = 1
flip_x = x
flip_y = y
while True:
flip_x += x_direction
flip_y += y_direction
if self._one_direction_valid(flip_x, flip_y, 0 - self.color):
self.board[flip_x, flip_y] = self.color
else:
break
if valid:
return True
else:
return False
def executor_do_move(self, history, latest_boards, board, color, vertex):
self.board = np.reshape(board, (self.size, self.size))
self.color = color
self.action = self._flatten(vertex)
if self.action == 64:
valid_moves = self._find_correct_moves(is_next=True)
if not len(valid_moves):
return False
else:
return True
else:
self._step()
return True
def executor_get_score(self, board):
self.board = board
self._game_over()
if self.black_win is not None:
return self.black_win
else:
raise ValueError("Game not finished!")
if __name__ == "__main__":
reversi = Reversi()
# board = reversi.get_board()
# print(board)
# state, value = reversi.simulate_step_forward([board, -1], 20)
# print(state[0])
# print("board")
# print(board)
# r = reversi.executor_get_score(board)
# print(r)