debug for go and reversi
This commit is contained in:
parent
426251e158
commit
74504ceb1d
@ -6,6 +6,8 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
from game import Game
|
from game import Game
|
||||||
|
import copy
|
||||||
|
import numpy as np
|
||||||
import utils
|
import utils
|
||||||
|
|
||||||
|
|
||||||
@ -186,7 +188,10 @@ class GTPEngine():
|
|||||||
return self._game.game_engine.executor_get_score(self._game.board), True
|
return self._game.game_engine.executor_get_score(self._game.board), True
|
||||||
|
|
||||||
def cmd_show_board(self, args, **kwargs):
|
def cmd_show_board(self, args, **kwargs):
|
||||||
return self._game.board, True
|
board = copy.deepcopy(self._game.board)
|
||||||
|
if isinstance(board, np.ndarray):
|
||||||
|
board = board.flatten().tolist()
|
||||||
|
return board, True
|
||||||
|
|
||||||
def cmd_get_prob(self, args, **kwargs):
|
def cmd_get_prob(self, args, **kwargs):
|
||||||
return self._game.prob, True
|
return self._game.prob, True
|
||||||
|
@ -26,33 +26,37 @@ class Game:
|
|||||||
TODO : Maybe merge with the engine class in future,
|
TODO : Maybe merge with the engine class in future,
|
||||||
currently leave it untouched for interacting with Go UI.
|
currently leave it untouched for interacting with Go UI.
|
||||||
'''
|
'''
|
||||||
def __init__(self, name="go", role="unknown", debug=False, checkpoint_path=None):
|
def __init__(self, name="reversi", role="unknown", debug=False, checkpoint_path=None):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.role = role
|
self.role = role
|
||||||
self.debug = debug
|
self.debug = debug
|
||||||
if self.name == "go":
|
if self.name == "go":
|
||||||
self.size = 9
|
self.size = 9
|
||||||
self.komi = 3.75
|
self.komi = 3.75
|
||||||
self.board = [utils.EMPTY] * (self.size ** 2)
|
|
||||||
self.history = []
|
self.history = []
|
||||||
self.history_length = 8
|
self.history_length = 8
|
||||||
self.latest_boards = deque(maxlen=8)
|
|
||||||
for _ in range(8):
|
|
||||||
self.latest_boards.append(self.board)
|
|
||||||
self.game_engine = go.Go(size=self.size, komi=self.komi, role=self.role)
|
self.game_engine = go.Go(size=self.size, komi=self.komi, role=self.role)
|
||||||
|
self.board = [utils.EMPTY] * (self.size ** 2)
|
||||||
elif self.name == "reversi":
|
elif self.name == "reversi":
|
||||||
self.size = 8
|
self.size = 8
|
||||||
self.history_length = 1
|
self.history_length = 1
|
||||||
self.game_engine = reversi.Reversi()
|
self.history = []
|
||||||
|
self.game_engine = reversi.Reversi(size=self.size)
|
||||||
self.board = self.game_engine.get_board()
|
self.board = self.game_engine.get_board()
|
||||||
else:
|
else:
|
||||||
raise ValueError(name + " is an unknown game...")
|
raise ValueError(name + " is an unknown game...")
|
||||||
|
|
||||||
self.evaluator = model.ResNet(self.size, self.size ** 2 + 1, history_length=self.history_length)
|
self.evaluator = model.ResNet(self.size, self.size ** 2 + 1, history_length=self.history_length)
|
||||||
|
self.latest_boards = deque(maxlen=self.history_length)
|
||||||
|
for _ in range(self.history_length):
|
||||||
|
self.latest_boards.append(self.board)
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
|
if self.name == "go":
|
||||||
self.board = [utils.EMPTY] * (self.size ** 2)
|
self.board = [utils.EMPTY] * (self.size ** 2)
|
||||||
self.history = []
|
self.history = []
|
||||||
|
if self.name == "reversi":
|
||||||
|
self.board = self.game_engine.get_board()
|
||||||
for _ in range(self.history_length):
|
for _ in range(self.history_length):
|
||||||
self.latest_boards.append(self.board)
|
self.latest_boards.append(self.board)
|
||||||
|
|
||||||
@ -84,7 +88,7 @@ class Game:
|
|||||||
if self.name == "go":
|
if self.name == "go":
|
||||||
res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex)
|
res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex)
|
||||||
elif self.name == "reversi":
|
elif self.name == "reversi":
|
||||||
res = self.game_engine.executor_do_move(self.board, color, vertex)
|
res = self.game_engine.executor_do_move(self.history, self.latest_boards, self.board, color, vertex)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def think_play_move(self, color):
|
def think_play_move(self, color):
|
||||||
@ -110,13 +114,14 @@ class Game:
|
|||||||
if row[i] < 10:
|
if row[i] < 10:
|
||||||
print(' ', end='')
|
print(' ', end='')
|
||||||
for j in range(self.size):
|
for j in range(self.size):
|
||||||
print(self.status2symbol(self.board[self._flatten((j + 1, i + 1))]), end=' ')
|
print(self.status2symbol(self.board[self.game_engine._flatten((j + 1, i + 1))]), end=' ')
|
||||||
print('')
|
print('')
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
g = Game()
|
g = Game("go")
|
||||||
g.show_board()
|
print(g.board)
|
||||||
|
g.clear()
|
||||||
g.think_play_move(1)
|
g.think_play_move(1)
|
||||||
#file = open("debug.txt", "a")
|
#file = open("debug.txt", "a")
|
||||||
#file.write("mcts check\n")
|
#file.write("mcts check\n")
|
||||||
|
@ -212,12 +212,12 @@ class Go:
|
|||||||
|
|
||||||
def simulate_step_forward(self, state, action):
|
def simulate_step_forward(self, state, action):
|
||||||
# initialize the simulate_board from state
|
# initialize the simulate_board from state
|
||||||
history_boards, color = state
|
history_boards, color = copy.deepcopy(state)
|
||||||
if history_boards[-1] == history_boards[-2] and action is utils.PASS:
|
if history_boards[-1] == history_boards[-2] and action is utils.PASS:
|
||||||
return None, 2 * (float(self.executor_get_score(history_boards[-1]) > 0)-0.5) * color
|
return None, 2 * (float(self.executor_get_score(history_boards[-1]) > 0)-0.5) * color
|
||||||
else:
|
else:
|
||||||
vertex = self._action2vertex(action)
|
vertex = self._action2vertex(action)
|
||||||
new_board = self._do_move(copy.copy(history_boards[-1]), color, vertex)
|
new_board = self._do_move(copy.deepcopy(history_boards[-1]), color, vertex)
|
||||||
history_boards.append(new_board)
|
history_boards.append(new_board)
|
||||||
new_color = -color
|
new_color = -color
|
||||||
return [history_boards, new_color], 0
|
return [history_boards, new_color], 0
|
||||||
@ -227,8 +227,8 @@ class Go:
|
|||||||
return False
|
return False
|
||||||
current_board[self._flatten(vertex)] = color
|
current_board[self._flatten(vertex)] = color
|
||||||
self._process_board(current_board, color, vertex)
|
self._process_board(current_board, color, vertex)
|
||||||
history.append(copy.copy(current_board))
|
history.append(copy.deepcopy(current_board))
|
||||||
latest_boards.append(copy.copy(current_board))
|
latest_boards.append(copy.deepcopy(current_board))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _find_empty(self, current_board):
|
def _find_empty(self, current_board):
|
||||||
|
@ -173,10 +173,10 @@ class ResNet(object):
|
|||||||
"""
|
"""
|
||||||
state = np.zeros([1, self.board_size, self.board_size, 2 * self.history_length + 1])
|
state = np.zeros([1, self.board_size, self.board_size, 2 * self.history_length + 1])
|
||||||
for i in range(self.history_length):
|
for i in range(self.history_length):
|
||||||
state[0, :, :, i] = np.array(np.array(history[i]) == np.ones(self.board_size ** 2)).reshape(self.board_size,
|
state[0, :, :, i] = np.array(np.array(history[i]).flatten() == np.ones(self.board_size ** 2)).reshape(self.board_size,
|
||||||
self.board_size)
|
self.board_size)
|
||||||
state[0, :, :, i + self.history_length] = np.array(
|
state[0, :, :, i + self.history_length] = np.array(
|
||||||
np.array(history[i]) == -np.ones(self.board_size ** 2)).reshape(self.board_size, self.board_size)
|
np.array(history[i]).flatten() == -np.ones(self.board_size ** 2)).reshape(self.board_size, self.board_size)
|
||||||
# TODO: need a config to specify the BLACK and WHITE
|
# TODO: need a config to specify the BLACK and WHITE
|
||||||
if color == +1:
|
if color == +1:
|
||||||
state[0, :, :, 2 * self.history_length] = np.ones([self.board_size, self.board_size])
|
state[0, :, :, 2 * self.history_length] = np.ones([self.board_size, self.board_size])
|
||||||
@ -301,7 +301,7 @@ class ResNet(object):
|
|||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
new_board = copy.copy(board)
|
new_board = copy.deepcopy(board)
|
||||||
if new_board.ndim == 3:
|
if new_board.ndim == 3:
|
||||||
new_board = np.expand_dims(new_board, axis=0)
|
new_board = np.expand_dims(new_board, axis=0)
|
||||||
|
|
||||||
@ -331,7 +331,7 @@ class ResNet(object):
|
|||||||
:param orientation: an integer, which orientation to reflect
|
:param orientation: an integer, which orientation to reflect
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
new_board = copy.copy(board)
|
new_board = copy.deepcopy(board)
|
||||||
for _ in range(times):
|
for _ in range(times):
|
||||||
if orientation == 0:
|
if orientation == 0:
|
||||||
new_board = new_board[:, ::-1]
|
new_board = new_board[:, ::-1]
|
||||||
|
@ -89,7 +89,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
pattern = "[A-Z]{1}[0-9]{1}"
|
pattern = "[A-Z]{1}[0-9]{1}"
|
||||||
space = re.compile("\s+")
|
space = re.compile("\s+")
|
||||||
size = 9
|
size = {"go":9, "reversi":8}
|
||||||
show = ['.', 'X', 'O']
|
show = ['.', 'X', 'O']
|
||||||
|
|
||||||
evaluate_rounds = 1
|
evaluate_rounds = 1
|
||||||
@ -102,13 +102,13 @@ if __name__ == '__main__':
|
|||||||
pass_flag = [False, False]
|
pass_flag = [False, False]
|
||||||
print("Start game {}".format(game_num))
|
print("Start game {}".format(game_num))
|
||||||
# end the game if both palyer chose to pass, or play too much turns
|
# end the game if both palyer chose to pass, or play too much turns
|
||||||
while not (pass_flag[0] and pass_flag[1]) and num < size ** 2 * 2:
|
while not (pass_flag[0] and pass_flag[1]) and num < size["reversi"] ** 2 * 2:
|
||||||
turn = num % 2
|
turn = num % 2
|
||||||
board = player[turn].run_cmd(str(num) + ' show_board')
|
board = player[turn].run_cmd(str(num) + ' show_board')
|
||||||
board = eval(board[board.index('['):board.index(']') + 1])
|
board = eval(board[board.index('['):board.index(']') + 1])
|
||||||
for i in range(size):
|
for i in range(size["reversi"]):
|
||||||
for j in range(size):
|
for j in range(size["reversi"]):
|
||||||
print show[board[i * size + j]] + " ",
|
print show[board[i * size["reversi"] + j]] + " ",
|
||||||
print "\n",
|
print "\n",
|
||||||
data.boards.append(board)
|
data.boards.append(board)
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
import copy
|
||||||
'''
|
'''
|
||||||
Settings of the Reversi game.
|
Settings of the Reversi game.
|
||||||
|
|
||||||
@ -8,13 +9,8 @@ Settings of the Reversi game.
|
|||||||
|
|
||||||
|
|
||||||
class Reversi:
|
class Reversi:
|
||||||
def __init__(self, black=None, white=None):
|
def __init__(self, **kwargs):
|
||||||
self.board = None # 8 * 8 board with 1 for black, -1 for white and 0 for blank
|
self.size = kwargs['size']
|
||||||
self.color = None # 1 for black and -1 for white
|
|
||||||
self.action = None # number in 0~63
|
|
||||||
self.winner = None
|
|
||||||
self.black_win = None
|
|
||||||
self.size = 8
|
|
||||||
|
|
||||||
def _deflatten(self, idx):
|
def _deflatten(self, idx):
|
||||||
x = idx // self.size + 1
|
x = idx // self.size + 1
|
||||||
@ -24,39 +20,39 @@ class Reversi:
|
|||||||
def _flatten(self, vertex):
|
def _flatten(self, vertex):
|
||||||
x, y = vertex
|
x, y = vertex
|
||||||
if (x == 0) and (y == 0):
|
if (x == 0) and (y == 0):
|
||||||
return 64
|
return self.size ** 2
|
||||||
return (x - 1) * self.size + (y - 1)
|
return (x - 1) * self.size + (y - 1)
|
||||||
|
|
||||||
def get_board(self, board=None):
|
def get_board(self):
|
||||||
self.board = board or np.zeros([8,8])
|
board = np.zeros([self.size, self.size], dtype=np.int32)
|
||||||
self.board[3, 3] = -1
|
board[self.size / 2 - 1, self.size / 2 - 1] = -1
|
||||||
self.board[4, 4] = -1
|
board[self.size / 2, self.size / 2] = -1
|
||||||
self.board[3, 4] = 1
|
board[self.size / 2 - 1, self.size / 2] = 1
|
||||||
self.board[4, 3] = 1
|
board[self.size / 2, self.size / 2 - 1] = 1
|
||||||
return self.board
|
return board
|
||||||
|
|
||||||
def _find_correct_moves(self, is_next=False):
|
def _find_correct_moves(self, board, color, is_next=False):
|
||||||
moves = []
|
moves = []
|
||||||
if is_next:
|
if is_next:
|
||||||
color = 0 - self.color
|
new_color = 0 - color
|
||||||
else:
|
else:
|
||||||
color = self.color
|
new_color = color
|
||||||
for i in range(64):
|
for i in range(self.size ** 2):
|
||||||
x, y = self._deflatten(i)
|
x, y = self._deflatten(i)
|
||||||
valid = self._is_valid(x - 1, y - 1, color)
|
valid = self._is_valid(board, x - 1, y - 1, new_color)
|
||||||
if valid:
|
if valid:
|
||||||
moves.append(i)
|
moves.append(i)
|
||||||
return moves
|
return moves
|
||||||
|
|
||||||
def _one_direction_valid(self, x, y, color):
|
def _one_direction_valid(self, board, x, y, color):
|
||||||
if (x >= 0) and (x < self.size):
|
if (x >= 0) and (x < self.size):
|
||||||
if (y >= 0) and (y < self.size):
|
if (y >= 0) and (y < self.size):
|
||||||
if self.board[x, y] == color:
|
if board[x, y] == color:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _is_valid(self, x, y, color):
|
def _is_valid(self, board, x, y, color):
|
||||||
if self.board[x, y]:
|
if board[x, y]:
|
||||||
return False
|
return False
|
||||||
for x_direction in [-1, 0, 1]:
|
for x_direction in [-1, 0, 1]:
|
||||||
for y_direction in [-1, 0, 1]:
|
for y_direction in [-1, 0, 1]:
|
||||||
@ -66,20 +62,18 @@ class Reversi:
|
|||||||
while True:
|
while True:
|
||||||
new_x += x_direction
|
new_x += x_direction
|
||||||
new_y += y_direction
|
new_y += y_direction
|
||||||
if self._one_direction_valid(new_x, new_y, 0 - color):
|
if self._one_direction_valid(board, new_x, new_y, 0 - color):
|
||||||
flag = 1
|
flag = 1
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
if self._one_direction_valid(new_x, new_y, color) and flag:
|
if self._one_direction_valid(board, new_x, new_y, color) and flag:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def simulate_get_mask(self, state, action_set):
|
def simulate_get_mask(self, state, action_set):
|
||||||
history_boards, color = state
|
history_boards, color = copy.deepcopy(state)
|
||||||
self.board = np.reshape(history_boards[-1], (self.size, self.size))
|
board = copy.deepcopy(history_boards[-1])
|
||||||
self.color = color
|
valid_moves = self._find_correct_moves(board, color)
|
||||||
valid_moves = self._find_correct_moves()
|
|
||||||
print(valid_moves)
|
|
||||||
if not len(valid_moves):
|
if not len(valid_moves):
|
||||||
invalid_action_mask = action_set[0:-1]
|
invalid_action_mask = action_set[0:-1]
|
||||||
else:
|
else:
|
||||||
@ -90,34 +84,34 @@ class Reversi:
|
|||||||
return invalid_action_mask
|
return invalid_action_mask
|
||||||
|
|
||||||
def simulate_step_forward(self, state, action):
|
def simulate_step_forward(self, state, action):
|
||||||
self.board = state[0].copy()
|
history_boards, color = copy.deepcopy(state)
|
||||||
self.board = np.reshape(self.board, (self.size, self.size))
|
board = copy.deepcopy(history_boards[-1])
|
||||||
self.color = state[1]
|
if action == self.size ** 2:
|
||||||
self.action = action
|
valid_moves = self._find_correct_moves(board, color, is_next=True)
|
||||||
if self.action == 64:
|
|
||||||
valid_moves = self._find_correct_moves(is_next=True)
|
|
||||||
if not len(valid_moves):
|
if not len(valid_moves):
|
||||||
self._game_over()
|
winner = self._get_winner(board)
|
||||||
return None, self.winner * self.color
|
return None, winner * color
|
||||||
else:
|
else:
|
||||||
return [self.board, 0 - self.color], 0
|
return [history_boards, 0 - color], 0
|
||||||
self._step()
|
new_board = self._step(board, color, action)
|
||||||
return [self.board, 0 - self.color], 0
|
history_boards.append(new_board)
|
||||||
|
return [history_boards, 0 - color], 0
|
||||||
|
|
||||||
def _game_over(self):
|
def _get_winner(self, board):
|
||||||
black_num, white_num = self._number_of_black_and_white()
|
black_num, white_num = self._number_of_black_and_white(board)
|
||||||
self.black_win = black_num - white_num
|
black_win = black_num - white_num
|
||||||
if self.black_win > 0:
|
if black_win > 0:
|
||||||
self.winner = 1
|
winner = 1
|
||||||
elif self.black_win < 0:
|
elif black_win < 0:
|
||||||
self.winner = -1
|
winner = -1
|
||||||
else:
|
else:
|
||||||
self.winner = 0
|
winner = 0
|
||||||
|
return winner
|
||||||
|
|
||||||
def _number_of_black_and_white(self):
|
def _number_of_black_and_white(self, board):
|
||||||
black_num = 0
|
black_num = 0
|
||||||
white_num = 0
|
white_num = 0
|
||||||
board_list = np.reshape(self.board, self.size ** 2)
|
board_list = np.reshape(board, self.size ** 2)
|
||||||
for i in range(len(board_list)):
|
for i in range(len(board_list)):
|
||||||
if board_list[i] == 1:
|
if board_list[i] == 1:
|
||||||
black_num += 1
|
black_num += 1
|
||||||
@ -125,19 +119,18 @@ class Reversi:
|
|||||||
white_num += 1
|
white_num += 1
|
||||||
return black_num, white_num
|
return black_num, white_num
|
||||||
|
|
||||||
def _step(self):
|
def _step(self, board, color, action):
|
||||||
if self.action < 0 or self.action > 63:
|
if action < 0 or action > self.size ** 2 - 1:
|
||||||
raise ValueError("Action not in the range of [0,63]!")
|
raise ValueError("Action not in the range of [0,63]!")
|
||||||
if self.action is None:
|
if action is None:
|
||||||
raise ValueError("Action is None!")
|
raise ValueError("Action is None!")
|
||||||
x, y = self._deflatten(self.action)
|
x, y = self._deflatten(action)
|
||||||
valid = self._flip(x -1, y - 1)
|
new_board = self._flip(board, x - 1, y - 1, color)
|
||||||
if not valid:
|
return new_board
|
||||||
raise ValueError("Illegal action!")
|
|
||||||
|
|
||||||
def _flip(self, x, y):
|
def _flip(self, board, x, y, color):
|
||||||
valid = 0
|
valid = 0
|
||||||
self.board[x, y] = self.color
|
board[x, y] = color
|
||||||
for x_direction in [-1, 0, 1]:
|
for x_direction in [-1, 0, 1]:
|
||||||
for y_direction in [-1, 0, 1]:
|
for y_direction in [-1, 0, 1]:
|
||||||
new_x = x
|
new_x = x
|
||||||
@ -146,47 +139,46 @@ class Reversi:
|
|||||||
while True:
|
while True:
|
||||||
new_x += x_direction
|
new_x += x_direction
|
||||||
new_y += y_direction
|
new_y += y_direction
|
||||||
if self._one_direction_valid(new_x, new_y, 0 - self.color):
|
if self._one_direction_valid(board, new_x, new_y, 0 - color):
|
||||||
flag = 1
|
flag = 1
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
if self._one_direction_valid(new_x, new_y, self.color) and flag:
|
if self._one_direction_valid(board, new_x, new_y, color) and flag:
|
||||||
valid = 1
|
valid = 1
|
||||||
flip_x = x
|
flip_x = x
|
||||||
flip_y = y
|
flip_y = y
|
||||||
while True:
|
while True:
|
||||||
flip_x += x_direction
|
flip_x += x_direction
|
||||||
flip_y += y_direction
|
flip_y += y_direction
|
||||||
if self._one_direction_valid(flip_x, flip_y, 0 - self.color):
|
if self._one_direction_valid(board, flip_x, flip_y, 0 - color):
|
||||||
self.board[flip_x, flip_y] = self.color
|
board[flip_x, flip_y] = color
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
if valid:
|
if valid:
|
||||||
return True
|
return board
|
||||||
else:
|
else:
|
||||||
return False
|
raise ValueError("Invalid action")
|
||||||
|
|
||||||
def executor_do_move(self, history, latest_boards, board, color, vertex):
|
def executor_do_move(self, history, latest_boards, board, color, vertex):
|
||||||
self.board = np.reshape(board, (self.size, self.size))
|
board = np.reshape(board, (self.size, self.size))
|
||||||
self.color = color
|
color = color
|
||||||
self.action = self._flatten(vertex)
|
action = self._flatten(vertex)
|
||||||
if self.action == 64:
|
if action == self.size ** 2:
|
||||||
valid_moves = self._find_correct_moves(is_next=True)
|
valid_moves = self._find_correct_moves(board, color, is_next=True)
|
||||||
if not len(valid_moves):
|
if not len(valid_moves):
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
self._step()
|
new_board = self._step(board, color, action)
|
||||||
|
history.append(new_board)
|
||||||
|
latest_boards.append(new_board)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def executor_get_score(self, board):
|
def executor_get_score(self, board):
|
||||||
self.board = board
|
board = board
|
||||||
self._game_over()
|
winner = self._get_winner(board)
|
||||||
if self.black_win is not None:
|
return winner
|
||||||
return self.black_win
|
|
||||||
else:
|
|
||||||
raise ValueError("Game not finished!")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -110,15 +110,15 @@ class ActionNode(object):
|
|||||||
self.reward = 0
|
self.reward = 0
|
||||||
|
|
||||||
def type_conversion_to_tuple(self):
|
def type_conversion_to_tuple(self):
|
||||||
if type(self.next_state) is np.ndarray:
|
if isinstance(self.next_state, np.ndarray):
|
||||||
self.next_state = self.next_state.tolist()
|
self.next_state = self.next_state.tolist()
|
||||||
if type(self.next_state) is list:
|
if isinstance(self.next_state, list):
|
||||||
self.next_state = list2tuple(self.next_state)
|
self.next_state = list2tuple(self.next_state)
|
||||||
|
|
||||||
def type_conversion_to_origin(self):
|
def type_conversion_to_origin(self):
|
||||||
if self.state_type is np.ndarray:
|
if isinstance(self.state_type, np.ndarray):
|
||||||
self.next_state = np.array(self.next_state)
|
self.next_state = np.array(self.next_state)
|
||||||
if self.state_type is list:
|
if isinstance(self.state_type, np.ndarray):
|
||||||
self.next_state = tuple2list(self.next_state)
|
self.next_state = tuple2list(self.next_state)
|
||||||
|
|
||||||
def selection(self, simulator):
|
def selection(self, simulator):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user