https://qiita.com/uezo/items/87b25c93199d72a56a9a
をベースに
https://www.andchild.jp/products/detail/839
を実装しようとしています。
動かしつつデバッグしているのですが、以下エラーが発生しており、どこを直せばいいかわかりません。
QFunctionの出力層のサイズ指定とactionの型がこれでいいのか気になっていますが、違う場合どう合わせるべきかもわかっていません。よろしくお願いします。
Traceback (most recent call last): File "C:\Users\User\AppData\Local\Programs\Python\Python38\lib\contextlib.py", line 131, in __exit__ self.gen.throw(type, value, traceback) File "C:\Users\User\PycharmProjects\pythonProject\venv\lib\site-packages\chainer\utils\type_check.py", line 25, in get_function_check_context yield File "C:\Users\User\PycharmProjects\pythonProject\venv\lib\site-packages\chainer\function_node.py", line 455, in _check_data_type_forward self.check_type_forward(in_type) File "C:\Users\User\PycharmProjects\pythonProject\venv\lib\site-packages\chainer\functions\array\select_item.py", line 19, in check_type_forward type_check.expect( File "C:\Users\User\PycharmProjects\pythonProject\venv\lib\site-packages\chainer\utils\type_check.py", line 564, in expect expr.expect() File "C:\Users\User\PycharmProjects\pythonProject\venv\lib\site-packages\chainer\utils\type_check.py", line 495, in expect raise InvalidType( chainer.utils.type_check.InvalidType: Invalid operation is performed in: SelectItem (Forward) Expect: t.ndim == 1 Actual: 2 != 1
Python
import random import chainer import chainer.functions as F import chainer.links as L import chainerrl import numpy as np class Board: def reset(self): field = [0] * 54 hand1 = [1, 1, 1, 1, 2, 1, 2, 1, 3, 1, 3, 1] hand2 = [1, -1, 1, -1, 2, -1, 2, -1, 3, -1, 3, -1] field.extend(hand1) field.extend(hand2) self.board = np.array(field, dtype=np.float32) self.winner = None self.missed = False self.done = False def getfield(self, field): squares = [] for i in range(9): squares.append(field[6 * i : 6 * (i + 1)]) return squares def move(self, unit, target, turn): pickup = [] if unit == target or target not in range(1, 10): self.winner = turn * -1 self.missed = True self.done = True if unit in range(1, 10): field = self.board[:54] squares = self.getfield(field) square = squares[unit - 1] for i in range(3): if square[-(2 * i + 1)] == turn * -1: self.winner = turn * -1 self.missed = True self.done = True elif square[-(2 * i + 1)] == turn: pickup.append(square[-(2 * i + 2)]) pickup.append(square[-(2 * i + 1)]) square[-(2 * i + 2)] = 0 square[-(2 * i + 1)] = 0 elif i == 2: self.winner = turn * -1 self.missed = True self.done = True elif unit in range(10, 16): hand1 = self.board[54:66] position = (unit - 10) * 2 owner = hand1[position + 1] if owner != 1: self.winner = turn * -1 self.missed = True self.done = True else: pickup.append(hand1[position]) pickup.append(hand1[position + 1]) hand1[position] = 0 hand1[position + 1] = 0 field = self.board[:54] squares = self.getfield(field) square = squares[target - 1] if square[-2] != 0: self.winner = turn * -1 self.missed = True self.done = True for i in range(1, 3): if (square[-(2 * i)] == 0) & (square[-(2 * (i + 1))] != 0): if square[-(2 * (i + 1))] < pickup[0]: square[-(2 * i)] = pickup[0] square[-(2 * i) + 1] = pickup[1] break else: self.winner = turn * -1 self.missed = True self.done = True break elif i == 2: square[0] = pickup[0] square[1] = pickup[1] self.check_winner() def checkuppersquare(self, square): for i in range(3): if square[-(2 * (i + 1)) + 1] != 0: return [square[-(2 * (i + 1))], square[-(2 * (i + 1)) + 1]] return None def check_winner(self): win_conditions = ((0, 1, 2), (3, 4, 5), (6, 7, 8), (0, 3, 6), (1, 4, 7), (2, 5, 8), (0, 4, 8), (2, 4, 6)) field = self.board[:54] squares = self.getfield(field) for cond in win_conditions: one = self.checkuppersquare(squares[cond[0]]) two = self.checkuppersquare(squares[cond[1]]) three = self.checkuppersquare(squares[cond[2]]) if (one is not None) & (two is not None) & (three is not None): if one[1] == two[1] == three[1]: self.winner = one[1] self.done = True return def get_empty_pos(self): field = self.board[:54] squares = self.getfield(field) pickableindex = [] for i in range(9): upperunit = self.checkuppersquare(squares[i]) if upperunit is not None: if upperunit[1] == 1: pickableindex.append([i + 1, upperunit[0]]) hand1 = self.board[54:66] for i in range(6): if hand1[2 * i + 1] == 1: pickableindex.append([i + 10, hand1[2 * i]]) playable = [] for pick in pickableindex: for j in range(9): field = self.board[:54] squares = self.getfield(field) square = squares[j - 1] if square[-2] != 0: continue for i in range(1, 3): if (square[-(2 * i)] == 0) & (square[-(2 * (i + 1))] != 0): if square[-(2 * (i + 1))] < pick[1]: playable.append([pick[0], j + 1]) else: break elif i == 2: playable.append([pick[0], j + 1]) return random.choice(playable) def show(self): # 略 class RandomActor: def __init__(self, board): self.board = board self.random_count = 0 def random_action_func(self): self.random_count += 1 return self.board.get_empty_pos() class QFunction(chainer.Chain): def __init__(self, obs_size, n_actions, n_hidden_channels): super().__init__( l0=L.Linear(obs_size, n_hidden_channels), l1=L.Linear(n_hidden_channels, n_hidden_channels), l2=L.Linear(n_hidden_channels, n_hidden_channels), l3=L.Linear(n_hidden_channels, n_actions)) def __call__(self, x, test=False): h = F.leaky_relu(self.l0(x)) h = F.leaky_relu(self.l1(h)) h = F.leaky_relu(self.l2(h)) return chainerrl.action_value.DiscreteActionValue(self.l3(h)) b = Board() ra = RandomActor(b) obs_size = (2 * 3 * 9) + (2 * 6) * 2 n_actions = (9 + 6) * 9 q_func = QFunction(obs_size, n_actions, obs_size * n_actions) optimizer = chainer.optimizers.Adam(eps=1e-2) optimizer.setup(q_func) gamma = 0.95 explorer = chainerrl.explorers.LinearDecayEpsilonGreedy( start_epsilon=1.0, end_epsilon=0.3, decay_steps=50000, random_action_func=ra.random_action_func) replay_buffer = chainerrl.replay_buffer.ReplayBuffer(capacity=10 ** 6) agent_p1 = chainerrl.agents.DoubleDQN( q_func, optimizer, replay_buffer, gamma, explorer, replay_start_size=500, # update_frequency=1,target_update_frequency=100) agent_p2 = chainerrl.agents.DoubleDQN( q_func, optimizer, replay_buffer, gamma, explorer, replay_start_size=500, # update_frequency=1,target_update_frequency=100) n_episodes = 20000 miss = 0 win = 0 draw = 0 for i in range(1, n_episodes + 1): b.reset() reward = 0 agents = [agent_p1, agent_p2] turn = np.random.choice([0, 1]) last_state = None while not b.done: action = agents[turn].act_and_train(b.board.copy(), reward) b.move(action[0], action[1], 1) if b.done == True: if b.winner == 1: reward = 1 win += 1 elif b.winner == 0: draw += 1 else: reward = -1 if b.missed is True: miss += 1 agents[turn].stop_episode_and_train(b.board.copy(), reward, True) if agents[1 if turn == 0 else 0].last_state is not None and b.missed is False: agents[1 if turn == 0 else 0].stop_episode_and_train(last_state, reward * -1, True) else: last_state = b.board.copy() tmp = b.board[54:66].copy() b.board[54:66] = b.board[66:] b.board[66:] = tmp for i in range(len(b.board)): if (i % 2 != 0) & (b.board[i] != 0): b.board[i] = b.board[i] * -1 turn = 1 if turn == 0 else 0 if i % 100 == 0: miss = 0 win = 0 draw = 0 ra.random_count = 0 if i % 10000 == 0: agent_p1.save("result_" + str(i)) print("Training finished.") # 人間のプレーヤー、略 # 検証、略
まだ回答がついていません
会員登録して回答してみよう