Chainerrlで実装中のゲームのエラー箇所がわかりません。

https://qiita.com/uezo/items/87b25c93199d72a56a9a
をベースに
https://www.andchild.jp/products/detail/839
を実装しようとしています。

動かしつつデバッグしているのですが、以下エラーが発生しており、どこを直せばいいかわかりません。
QFunctionの出力層のサイズ指定とactionの型がこれでいいのか気になっていますが、違う場合どう合わせるべきかもわかっていません。よろしくお願いします。

Traceback (most recent call last):
  File "C:\Users\User\AppData\Local\Programs\Python\Python38\lib\contextlib.py", line 131, in __exit__
    self.gen.throw(type, value, traceback)
  File "C:\Users\User\PycharmProjects\pythonProject\venv\lib\site-packages\chainer\utils\type_check.py", line 25, in get_function_check_context
    yield
  File "C:\Users\User\PycharmProjects\pythonProject\venv\lib\site-packages\chainer\function_node.py", line 455, in _check_data_type_forward
    self.check_type_forward(in_type)
  File "C:\Users\User\PycharmProjects\pythonProject\venv\lib\site-packages\chainer\functions\array\select_item.py", line 19, in check_type_forward
    type_check.expect(
  File "C:\Users\User\PycharmProjects\pythonProject\venv\lib\site-packages\chainer\utils\type_check.py", line 564, in expect
    expr.expect()
  File "C:\Users\User\PycharmProjects\pythonProject\venv\lib\site-packages\chainer\utils\type_check.py", line 495, in expect
    raise InvalidType(
chainer.utils.type_check.InvalidType: 
Invalid operation is performed in: SelectItem (Forward)

Expect: t.ndim == 1
Actual: 2 != 1

Python
1import random
2import chainer
3import chainer.functions as F
4import chainer.links as L
5import chainerrl
6import numpy as np
7
8class Board:
9    def reset(self):
10        field = [0] * 54
11        hand1 = [1, 1, 1, 1, 2, 1, 2, 1, 3, 1, 3, 1]
12        hand2 = [1, -1, 1, -1, 2, -1, 2, -1, 3, -1, 3, -1]
13        field.extend(hand1)
14        field.extend(hand2)
15        self.board = np.array(field, dtype=np.float32)
16        self.winner = None
17        self.missed = False
18        self.done = False
19
20    def getfield(self, field):
21        squares = []
22        for i in range(9):
23            squares.append(field[6 * i : 6 * (i + 1)])
24        return squares
25
26    def move(self, unit, target, turn):
27        pickup = []
28        if unit == target or target not in range(1, 10):
29            self.winner = turn * -1
30            self.missed = True
31            self.done = True
32        if unit in range(1, 10):
33            field = self.board[:54]
34            squares = self.getfield(field)
35            square = squares[unit - 1]
36            for i in range(3):
37                if square[-(2 * i + 1)] == turn * -1:
38                    self.winner = turn * -1
39                    self.missed = True
40                    self.done = True
41                elif square[-(2 * i + 1)] == turn:
42                    pickup.append(square[-(2 * i + 2)])
43                    pickup.append(square[-(2 * i + 1)])
44                    square[-(2 * i + 2)] = 0
45                    square[-(2 * i + 1)] = 0
46                elif i == 2:
47                    self.winner = turn * -1
48                    self.missed = True
49                    self.done = True
50        elif unit in range(10, 16):
51            hand1 = self.board[54:66]
52            position = (unit - 10) * 2
53            owner = hand1[position + 1]
54            if owner != 1:
55                self.winner = turn * -1
56                self.missed = True
57                self.done = True
58            else:
59                pickup.append(hand1[position])
60                pickup.append(hand1[position + 1])
61                hand1[position] = 0
62                hand1[position + 1] = 0
63
64        field = self.board[:54]
65        squares = self.getfield(field)
66        square = squares[target - 1]
67        if square[-2] != 0:
68            self.winner = turn * -1
69            self.missed = True
70            self.done = True
71        for i in range(1, 3):
72            if (square[-(2 * i)] == 0) & (square[-(2 * (i + 1))] != 0):
73                if square[-(2 * (i + 1))] < pickup[0]:
74                    square[-(2 * i)] = pickup[0]
75                    square[-(2 * i) + 1] = pickup[1]
76                    break
77                else:
78                    self.winner = turn * -1
79                    self.missed = True
80                    self.done = True
81                    break
82            elif i == 2:
83                square[0] = pickup[0]
84                square[1] = pickup[1]
85        self.check_winner()
86
87
88    def checkuppersquare(self, square):
89        for i in range(3):
90            if square[-(2 * (i + 1)) + 1] != 0:
91                return [square[-(2 * (i + 1))], square[-(2 * (i + 1)) + 1]]
92        return None
93
94    def check_winner(self):
95        win_conditions = ((0, 1, 2), (3, 4, 5), (6, 7, 8), (0, 3, 6), (1, 4, 7), (2, 5, 8), (0, 4, 8), (2, 4, 6))
96        field = self.board[:54]
97        squares = self.getfield(field)
98        for cond in win_conditions:
99            one = self.checkuppersquare(squares[cond[0]])
100            two = self.checkuppersquare(squares[cond[1]])
101            three = self.checkuppersquare(squares[cond[2]])
102            if (one is not None) & (two is not None) & (three is not None):
103                if one[1] == two[1] == three[1]:
104                    self.winner = one[1]
105                    self.done = True
106                    return
107
108    def get_empty_pos(self):
109        field = self.board[:54]
110        squares = self.getfield(field)
111        pickableindex = []
112        for i in range(9):
113            upperunit = self.checkuppersquare(squares[i])
114            if upperunit is not None:
115                if upperunit[1] == 1:
116                    pickableindex.append([i + 1, upperunit[0]])
117        hand1 = self.board[54:66]
118        for i in range(6):
119            if hand1[2 * i + 1] == 1:
120                pickableindex.append([i + 10, hand1[2 * i]])
121        playable = []
122        for pick in pickableindex:
123            for j in range(9):
124                field = self.board[:54]
125                squares = self.getfield(field)
126                square = squares[j - 1]
127                if square[-2] != 0:
128                    continue
129                for i in range(1, 3):
130                    if (square[-(2 * i)] == 0) & (square[-(2 * (i + 1))] != 0):
131                        if square[-(2 * (i + 1))] < pick[1]:
132                            playable.append([pick[0], j + 1])
133                        else:
134                            break
135                    elif i == 2:
136                        playable.append([pick[0], j + 1])
137        return random.choice(playable)
138
139    def show(self):
140        # 略
141
142class RandomActor:
143    def __init__(self, board):
144        self.board = board
145        self.random_count = 0
146
147    def random_action_func(self):
148        self.random_count += 1
149        return self.board.get_empty_pos()
150
151class QFunction(chainer.Chain):
152    def __init__(self, obs_size, n_actions, n_hidden_channels):
153        super().__init__(
154            l0=L.Linear(obs_size, n_hidden_channels),
155            l1=L.Linear(n_hidden_channels, n_hidden_channels),
156            l2=L.Linear(n_hidden_channels, n_hidden_channels),
157            l3=L.Linear(n_hidden_channels, n_actions))
158
159    def __call__(self, x, test=False):
160        h = F.leaky_relu(self.l0(x))
161        h = F.leaky_relu(self.l1(h))
162        h = F.leaky_relu(self.l2(h))
163        return chainerrl.action_value.DiscreteActionValue(self.l3(h))
164
165b = Board()
166ra = RandomActor(b)
167obs_size = (2 * 3 * 9) + (2 * 6) * 2
168n_actions = (9 + 6) * 9
169q_func = QFunction(obs_size, n_actions, obs_size * n_actions)
170optimizer = chainer.optimizers.Adam(eps=1e-2)
171optimizer.setup(q_func)
172gamma = 0.95
173explorer = chainerrl.explorers.LinearDecayEpsilonGreedy(
174    start_epsilon=1.0, end_epsilon=0.3, decay_steps=50000, random_action_func=ra.random_action_func)
175replay_buffer = chainerrl.replay_buffer.ReplayBuffer(capacity=10 ** 6)
176agent_p1 = chainerrl.agents.DoubleDQN(
177    q_func, optimizer, replay_buffer, gamma, explorer,
178    replay_start_size=500,
179    # update_frequency=1,target_update_frequency=100)
180agent_p2 = chainerrl.agents.DoubleDQN(
181    q_func, optimizer, replay_buffer, gamma, explorer,
182    replay_start_size=500,
183    # update_frequency=1,target_update_frequency=100)
184
185n_episodes = 20000
186miss = 0
187win = 0
188draw = 0
189for i in range(1, n_episodes + 1):
190    b.reset()
191    reward = 0
192    agents = [agent_p1, agent_p2]
193    turn = np.random.choice([0, 1])
194    last_state = None
195    while not b.done:
196        action = agents[turn].act_and_train(b.board.copy(), reward)
197        b.move(action[0], action[1], 1)
198        if b.done == True:
199            if b.winner == 1:
200                reward = 1
201                win += 1
202            elif b.winner == 0:
203                draw += 1
204            else:
205                reward = -1
206            if b.missed is True:
207                miss += 1
208            agents[turn].stop_episode_and_train(b.board.copy(), reward, True)
209            if agents[1 if turn == 0 else 0].last_state is not None and b.missed is False:
210                agents[1 if turn == 0 else 0].stop_episode_and_train(last_state, reward * -1, True)
211        else:
212            last_state = b.board.copy()
213            tmp = b.board[54:66].copy()
214            b.board[54:66] = b.board[66:]
215            b.board[66:] = tmp
216            for i in range(len(b.board)):
217                if (i % 2 != 0) & (b.board[i] != 0):
218                    b.board[i] = b.board[i] * -1
219            turn = 1 if turn == 0 else 0
220
221    if i % 100 == 0:
222        miss = 0
223        win = 0
224        draw = 0
225        ra.random_count = 0
226    if i % 10000 == 0:
227        agent_p1.save("result_" + str(i))
228
229print("Training finished.")
230
231# 人間のプレーヤー、略
232
233# 検証、略
234