list index out of rangeが解決できない。ループが止まらない。

実現したいこと

ポケモンのalphazero(モンテカルロ、deeplearning、強化学習)を作っています。エラーを解決してバトルを学習してもらいたいです。

発生している問題・分からないこと

child_nodesに存在しない引数が渡されてしまっている
。ループが止まらない（コメント追記）

エラーメッセージ

error
1(省略、文字数制限のためコメントに記載print文の実行結果など)
2---------------------------------------------------------------------------
3IndexError                                Traceback (most recent call last)
4Cell In[18], line 44
5     42         c2hp=player1[0].actual_hp
6     43     result=((c1,-1,-1,c1hp),(c2,-1,-1,c2hp))
7---> 44     next_action=action1(result)
8     45 winner = battle.get_winner()
9     46 #ゲーム終了時
10
11Cell In[16], line 120, in pv_mcts_action.<locals>.pv_mcts_action(state)
12    119 def pv_mcts_action(state):
13--> 120     scores = pv_mcts_scores(model, state, temperature,winner)
14    121     rng=np.random.default_rng()
15    122     return rng.choice([0,1,2,3], p=scores)
16
17Cell In[16], line 105, in pv_mcts_scores(model, state, temperature, winner)
18    103 # 複数回の評価の実行
19    104 for _ in range(PV_EVALUATE_COUNT):
20--> 105     root_node.evaluate()
21    107 # 合法手の確率分布
22    108 scores = nodes_to_scores(root_node.child_nodes)
23
24Cell In[16], line 67, in pv_mcts_scores.<locals>.Node.evaluate(self)
25     62     return value
26     64 # 子ノードが存在する時
27     65 else:
28     66     # アーク評価値が最大の子ノードの評価で価値を取得
29---> 67     value = self.next_child_node().evaluate()
30     69     # 累計価値と試行回数の更新
31     70     self.w += value
32
33Cell In[16], line 98, in pv_mcts_scores.<locals>.Node.next_child_node(self)
34     96 print("len(self.child_nodes)",len(self.child_nodes))
35     97 print("self.child_nodes",self.child_nodes)
36---> 98 return self.child_nodes[a]
37
38IndexError: list index out of range

該当のソースコード

python
1from dual_network import DN_INPUT_SHAPE
2from math import sqrt
3from tensorflow.keras.models import load_model
4from pathlib import Path
5import numpy as np
6import battle
7from battle import Battle
8import pokedex as p
9import moves as m
10
11# パラメータの準備
12PV_EVALUATE_COUNT = 50 # 1推論あたりのシミュレーション回数（本家は1600）
13
14# 推論
15def predict(model, state):
16    # 推論のための入力データのシェイプの変換
17    x=np.array(state)
18    x=x.reshape(1,4,2)
19
20    # 推論
21    y=model.predict(x,batch_size=1)
22
23    # 方策の取得
24    policies=y[0][0:4]
25    
26    # 価値の取得
27    value=y[1][0]
28
29    return policies, value    
30
31# ノードのリストを試行回数のリストに変換
32def nodes_to_scores(nodes):
33    scores = []
34    for c in nodes:
35        scores.append(c.n)
36    return scores
37
38# モンテカルロ木探索のスコアの取得
39#def pv_mcts_scores(model, p1_is,p1_mae_action,p1_took_damage,p1_nokorihp,p1_is,p2_mae_action,p2_took_damage,p2_nokorihp, temperature): #stateに8つの状態
40def pv_mcts_scores(model, state, temperature,winner=None): #stateに8つの状態
41# モンテカルロ木探索のノードの定義
42    class Node:
43        player1=[
44            p.Jolteon([m.BodySlam(),m.DoubleKick(),m.PinMissle(),m.Thunderbolt()])
45                ]
46
47        player2=[
48            p.Rhydon([m.Earthquake(), m.RockSlide(), m.Surf(), m.BodySlam()])
49                ]
50        
51        # ノードの初期化
52        def __init__(self, state, p,winner):
53            self.state = state # 状態
54            self.p = p # 方策
55            self.w = 0 # 累計価値
56            self.n = 0 # 試行回数
57            self.winner=winner
58            self.child_nodes = None  # 子ノード群
59            (self.p1_is,self.p1_mae_action,self.p1_took_damage,self.p1_nokorihp),(self.p1_is,self.p2_mae_action,self.p2_took_damage,self.p2_nokorihp)=state
60            self.turn=0
61            
62        # 局面の価値の計算
63        def evaluate(self): #Battle が入る
64            # ゲーム終了時
65            if self.winner is not None:
66                # 勝敗結果で価値を取得
67                #print("hplen",len(self.p1_nokorihp))
68                battle=Battle(player1,player2)
69                value = 0 if self.winner == player1 else -1
70
71                # 累計価値と試行回数の更新
72                self.w += value
73                self.n += 1
74                return value
75
76            # 子ノードが存在しない時
77            if not self.child_nodes:
78                # ニューラルネットワークの推論で方策と価値を取得
79                policies, value = predict(model, self.state)
80
81                print("policies",policies)
82                print("value",value)
83
84                # 累計価値と試行回数の更新
85                self.w += value
86                self.n += 1
87
88                
89                # 子ノードの展開
90                self.child_nodes = []
91                a=[6,7,8,9]
92                for action, policy in zip(a, policies):
93                    battle=Battle(player1,player2)
94                    zyoutai=battle.forward_step(self.p1_nokorihp,self.p2_nokorihp,action)
95                    winner = battle.get_winner()
96                    self.child_nodes.append(Node(zyoutai, policy,winner))
97
98
99                return value
100
101            # 子ノードが存在する時
102            else:
103                # アーク評価値が最大の子ノードの評価で価値を取得
104                value = self.next_child_node().evaluate()
105
106                # 累計価値と試行回数の更新
107                self.w += value
108                self.n += 1
109                return value
110
111        # アーク評価値が最大の子ノードを取得
112        def next_child_node(self):
113            # アーク評価値の計算
114            C_PUCT = 1.0
115            t = sum(nodes_to_scores(self.child_nodes))
116            pucb_values = []
117            print("前 child_nodes",len(self.child_nodes))
118            for child_node in self.child_nodes:
119                print("child_node.p",child_node.p)
120                pucb_values.append((-child_node.w / child_node.n if child_node.n else 0.0) +
121                    C_PUCT * child_node.p * sqrt(t) / (1 + child_node.n))
122                self.turn+=1
123
124            # アーク評価値が最大の子ノードを返す
125            print("argmax",np.argmax(pucb_values))
126            print("turn",self.turn)
127            print("len(pucb_values)",len(pucb_values))
128            print("pucb_values",pucb_values)
129            index=np.argmax(pucb_values)
130            a = index.item()
131            print("index",type(index))
132            print("index",index)
133            print("len(self.child_nodes)",len(self.child_nodes))
134            print("self.child_nodes",self.child_nodes)
135            return self.child_nodes[a]
136
137    # 現在の局面のノードの作成
138    root_node = Node(state, 0,winner)
139
140    # 複数回の評価の実行
141    for _ in range(PV_EVALUATE_COUNT):
142        root_node.evaluate()
143
144    # 合法手の確率分布
145    scores = nodes_to_scores(root_node.child_nodes)
146    if temperature == 0: # 最大値のみ1
147        action = np.argmax(scores)
148        scores = np.zeros(len(scores))
149        scores[action] = 1
150    else: # ボルツマン分布でバラつき付加
151        scores = boltzman(scores, temperature)
152    return scores
153
154# モンテカルロ木探索で行動選択
155def pv_mcts_action(model, temperature=0):
156    def pv_mcts_action(state):
157        scores = pv_mcts_scores(model, state, temperature,winner)
158        rng=np.random.default_rng()
159        return rng.choice([0,1,2,3], p=scores)
160    return pv_mcts_action
161
162# ボルツマン分布
163def boltzman(xs, temperature):
164    xs = [x ** (1 / temperature) for x in xs]
165    return [x / sum(xs) for x in xs]
166
167
168import moves as m
169import pokedex as p
170from damage import calculate_damage
171
172# 動作確認
173if __name__ == '__main__':
174    # モデルの読み込み
175    path = sorted(Path('./model').glob('*.h5'))[-1]
176    model = load_model(str(path))
177    winner=None
178    # 状態の生成
179    player1=[
180        p.Jolteon([m.BodySlam(),m.DoubleKick(),m.PinMissle(),m.Thunderbolt()])
181        ]
182
183    player2=[
184        p.Rhydon([m.Earthquake(), m.RockSlide(), m.Surf(), m.BodySlam()])
185        ]
186
187    battle=Battle(player1,player2)
188
189    # モンテカルロ木探索で行動取得を行う関数の生成
190    action1 = pv_mcts_action(model, 1.0)
191
192    result=None
193    while True:
194        if result is not None:
195            if winner is not None:
196                print("バトルは終了しました")
197                break
198            else:
199                result=battle.forward_step(action=next_action)
200                next_action=action1(result)
201        else:
202            #１番目(resultない)
203            #result= battle.forward_step()
204            if player1[0].spe > player2[0].spe:
205                c1=1
206                c2=0
207                c1hp=player1[0].actual_hp
208                c2hp=player2[0].actual_hp
209            else:
210                c1=0
211                c2=1
212                c1hp=player2[0].actual_hp
213                c2hp=player1[0].actual_hp
214            result=((c1,-1,-1,c1hp),(c2,-1,-1,c2hp))
215            next_action=action1(result)
216        winner = battle.get_winner()
217        #ゲーム終了時
218        if winner is not None or battle.turn > 500:
219            break

試したこと・調べたこと

teratailやGoogle等で検索した
ソースコードを自分なりに変更した
知人に聞いた
その他

上記の詳細・結果

print文を追加し、child_nodesに存在しない引数が渡されてしまっていることがわかりました。

補足

参考　AlphaZero 深層学習・強化学習・探索人工知能プログラミング実践入門
macM1
jupyter notebook
質問の内容が違うことがわかったので新しく質問をしました。
https://teratail.com/questions/6grhjidtt878w4

miraimirai

2024/06/12 23:56

エラー省略部分 WARNING:tensorflow:No training configuration found in the save file, so the model was *not* compiled. Compile it manually. 1/1 [==============================] - 0s 104ms/step policies [[1.0365372e-36 1.4505721e-18 1.0000000e+00 1.6264764e-22]] value [1.] battle実行されました 0 3 モンテカルロ 6 c1 (Jolteon(271), BodySlam, [Jolteon(271)]) サンダースが技のしかかりをつかった! こうかはいまひとつ... 0.5 サイドンが21をうけた ❤️ サイドン残りHP 330 サイドン技のしかかりを使った! サンダースが123を受けた ⭐️ サンダース残りHP 148 前 child_nodes 1 child_node.p [1.0365372e-36 1.4505721e-18 1.0000000e+00 1.6264764e-22] argmax 0 turn 1 len(pucb_values) 1 pucb_values [array([0., 0., 0., 0.], dtype=float32)] index <class 'numpy.int64'> index 0 len(self.child_nodes) 1 self.child_nodes [<__main__.pv_mcts_scores.<locals>.Node object at 0x163bf6610>] 1/1 [==============================] - 0s 8ms/step policies [[1.1556966e-29 1.0000000e+00 1.6387254e-16 4.5036393e-24]] value [1.] battle実行されました 0 3 モンテカルロ 6 c1 (Jolteon(148), BodySlam, [Jolteon(148)]) サンダースが技のしかかりをつかった! こうかはいまひとつ... 0.5 サイドンが20をうけた ❤️ サイドン残りHP 310 サイドン技のしかかりを使った! サンダースが127を受けた ⭐️ サンダース残りHP 21 前 child_nodes 1 child_node.p [1.0365372e-36 1.4505721e-18 1.0000000e+00 1.6264764e-22] argmax 2 turn 2 len(pucb_values) 1 pucb_values [array([-1. , -1. , -0.5, -1. ], dtype=float32)] index <class 'numpy.int64'> index 2 len(self.child_nodes) 1 self.child_nodes [<__main__.pv_mcts_scores.<locals>.Node object at 0x163bf6610>]

miraimirai

2024/06/12 23:59

文字数が上限に達したのでこちらに追記します。クラスbattle、インデントがなくなってしまい申し訳ないです。 import moves as m import pokedex as p from damage import calculate_damage from random import randint def choose_action(self) :#ランダム攻撃 return randint(6,9) #raise NotImplementedError def get_spe_ordered_pokemon(c1, c2) : if c1[0].spe > c2[0].spe: return (c1, c2) if c1[0].spe < c2[0].spe: return (c2, c1) return (c1, c2) if random() > 0.5 else (c2, c1) def get_available_pokemons(pokemons): return [p for p in pokemons if p.actual_hp > 0] def is_dead(pokemons): return len(get_available_pokemons(pokemons)) == 0 def is_move(action) -> bool: return action >= 6 class Battle: def __init__(self,player1,player2): self.player1=player1 self.player2=player2 self.turn = 0 def forward_step(self,hp1=None,hp2=None,action=None): print("battle実行されました") self.turn += 1 if action is not None: action1=action else: #１回目の時 if(action=-1) action1 = choose_action(self.player2) action2 = choose_action(self.player1) #result=(action1,action2,player1_took_damage,player2_took_damage) active_pokemon1=self.player1[0] active_pokemon2=self.player2[0] print(action1 - 6) print(action2 - 6) print("モンテカルロ",action) if is_move(action1) and is_move(action2): # to handle both player choose a move c1, c2 = get_spe_ordered_pokemon( (active_pokemon1, active_pokemon1.actual_moves[action1 - 6], self.player1), (active_pokemon2, active_pokemon2.actual_moves[action2 - 6], self.player2), ) print("c1",c1) #print(f"{c1[2]}'の {c1[0]} が技{c1[1]}をつかった!") print(f"{c1[0].name_ja} が技{c1[1].name_ja}をつかった!") damage = calculate_damage(c1[0], c2[0], c1[1]) took_damage1=damage if hp1 is not None and c1 == self.player1: c1[0].actual_hp=hp1 c2[0].actual_hp=hp2 elif hp1 is not None and c1 == self.player2: c2[0].actual_hp=hp1 c1[0].actual_hp=hp2 c2[0].actual_hp -= damage #print(f"{c2[2]}の {c2[0]} が{damage}をうけた") print(f"{c2[0].name_ja} が{damage}をうけた") print("❤️",c2[0].name_ja,"残りHP",c2[0].actual_hp) if c2[0].actual_hp > 0: print(f"{c2[0].name_ja}技{c2[1].name_ja}を使った!") damage = calculate_damage(c2[0], c1[0], c2[1]) took_damage2=damage c1[0].actual_hp -= damage print(f"{c1[0].name_ja}が{damage}を受けた") print("⭐️",c1[0].name_ja,"残りHP",c1[0].actual_hp) if c1 == self.player1: return ((0,action1,took_damage1,c1[0].actual_hp),(1,action2,took_damage2,c2[0].actual_hp)) else: return ((0,action2,took_damage2,c2[0].actual_hp),(1,action1,took_damage1,c1[0].actual_hp)) def get_winner(self): if is_dead(self.player1): return self.player2 if is_dead(self.player2): return self.player1 def validate(self): for pokemons in (self.player1,self.player2): if len([p for p in pokemons if len(p.actual_moves) == 0]) > 0: raise ValueError("Pokemon must have at least one move") def run(self): self.validate() while True: self.forward_step() winner = self.get_winner() if winner is not None: print(f"{winner} won the battle!") return winner if self.turn > 500: raise Exception("Battle is too long") def run1(self,hp1=None,hp2=None): self.validate() action=self.forward_step(self,hp1=hp1,hp2=hp2) winner = self.get_winner() if action is not None: reward = 0 if winner == self.player1: print("優勝はlearner") reward = 1 return reward elif winner == self.player2: print("優勝はopponent") reward = -1 return reward elif battle.turn > 500: print("ターンが長い") reward = -0.1 return reward else: print("バトル中")

melian

2024/06/13 00:57

前回の質問で挙げられていた、参考にした書籍のウェブページからサンプルコードをダウンロードして確認してみました。 AlphaZero 深層学習・強化学習・探索人工知能プログラミング実践入門 | 株式会社ボーンデジタル https://www.borndigital.co.jp/book/14383/ 質問にあるコードと比較してみると、以下の3ファイルを改造している様に見えます。 sample/8_game/8_1_connect_four/pv_mcts.py sample/8_game/8_2_reversi/pv_mcts.py sample/8_game/8_3_simple_shogi/pv_mcts.py これらのコードの構造はほぼ同じになっていて、例えば 8_1_connect_four/pv_mcts.py を実行してみますと問題なく動作します。質問にあるコードと比較してみると、predict() メソッドの内容に違いがあります。入力データ(numpy.ndarray)の shape が異なっているのは問題ないのかもしれませんが、戻り値である policies と value は、書籍のコードではそれぞれ1次元の numpy.ndarray とスカラー値です。 # 推論 def predict(model, state): 　# 推論のための入力データのシェイプの変換　a, b, c = DN_INPUT_SHAPE 　x = np.array([state.pieces, state.enemy_pieces]) 　x = x.reshape(c, a, b).transpose(1, 2, 0).reshape(1, a, b, c) 　　　　　: 　# 方策の取得　policies = y[0][0][list(state.legal_actions())] # 合法手のみ　policies /= sum(policies) if sum(policies) else 1 # 合計1の確率分布に変換　# 価値の取得　value = y[1][0][0] 　return policies, value 一方、質問のコードでは、(debug print から推測すると) policies は numpy ndarray を要素とするリストになっていて、要素数は 1 です。そのため、以下の部分で self.child_nodes に追加される Node インスタンスは1個だけになっています。書籍のコードでは policies が shape (7,) の numpy ndarray なので、7個の Node インスタンスが追加されています。 # 子ノードの展開 self.child_nodes = [] a=[6,7,8,9] for action, policy in zip(a, policies): 　battle=Battle(player1,player2) 　zyoutai=battle.forward_step(self.p1_nokorihp,self.p2_nokorihp,action) 　winner = battle.get_winner() 　self.child_nodes.append(Node(zyoutai, policy,winner)) それから、前回の質問でも指摘しましたが、書籍のコードでは predict() に渡す引数は state ではなく、self.state になっています。 # ニューラルネットワークの推論で方策と価値を取得 policies, value = predict(model, self.state)

8524ba23

2024/06/13 01:31

https://teratail.com/help/avoid-asking#question https://teratail.com/help/question-tips#questionTips11 https://teratail.com/help/question-tips#questionTips35 https://teratail.com/help/question-tips は読みましたか？読んだ結果を本文に反映ください。解決のヒントを探れるかもしれません。

miraimirai

2024/06/13 02:24

書籍では一つの配列ですが、policiesがかぎかっこ2つになってしまう原因はなぜでしょうか

miraimirai

2024/06/13 02:29

書籍との違いについての質問ですが、a[[0,1,2,3]]と指定するとエラーが発生するのはなぜでしょうか？本ではこのように指定しているので。pythonaの仕様変更でしょうか？調べましたが見つかりませんでした。

melian

2024/06/13 02:41

書籍のコードでは(合法手のみですが) y[0][0][list(state.legal_actions())] となっています。今回のコードでは合法手を考慮する必要は無いのでしょうから、policies=y[0][0] とするのかもしれませんし、value も value=y[1][0][0] とするのかもしれません。あくまでもデバッグプリントからの推測によるもので、y[0] と y[1] の shape を確認することをお勧めします。(前回の質問のコメントでも指摘しましたが) また、書籍のコードでは入力データの reshape と policies の正規化を行っています。これらの処理の意味(意義)については書籍で解説されているはずなので、そちらも確認・理解する必要があるかと思います。質問にあるコード: ================= # 推論 def predict(model, state): 　　　　: 　# 方策の取得　policies=y[0][0:4] 　# 価値の取得　value=y[1][0] ================= 書籍のコード ================= # 推論 def predict(model, state): 　　　　: 　# 方策の取得　policies = y[0][0][list(state.legal_actions())] # 合法手のみ　policies /= sum(policies) if sum(policies) else 1 # 合計1の確率分布に変換　# 価値の取得　value = y[1][0][0] =================

miraimirai

2024/06/13 03:03

list(state.legal_actions())について state.legal_actions()は合法手の配列を出力します。ですので policies = y[0][0][[0,1,2,3,5]] と等価です。ですがエラーが出ます。仕様変更があったのでしょうか？

melian

2024/06/13 03:12

> policies = y[0][0][[0,1,2,3,5]] と等価です。ですがエラーが出ます。それはおそらく、y[0][0] の shape が (4,) だからでしょう。(index 5 の要素はない) ですが、state.legal_actions() が範囲外のインデックスを返しているということは、legal_actions() メソッドの処理内容に誤りがあるということになります。

miraimirai

2024/06/13 03:19 編集

別の質問です。ループが止まりません。 while True: の部分が止まりません。 winner = battle.get_winner() が機能していないと思っています。なぜ機能しないのかがわかりません。エラー内容 WARNING:tensorflow:No training configuration found in the save file, so the model was *not* compiled. Compile it manually. 1/1 [==============================] - 0s 106ms/step policies [1.0365372e-36, 1.4505721e-18, 1.0, 1.6264764e-22] value [1.] battle実行されました 0 3 モンテカルロ 6 c1 (Jolteon(271), BodySlam, [Jolteon(271)]) サンダースが技のしかかりをつかった! こうかはいまひとつ... 0.5 サイドンが19をうけた ❤️ サイドン残りHP 332 サイドン技のしかかりを使った! サンダースが135を受けた ⭐️ サンダース残りHP 136 battle実行されました 1 1 モンテカルロ 7 c1 (Jolteon(136), DoubleKick, [Jolteon(136)]) サンダースが技にどげりをつかった! こうかはばつぐんだ！ 2 サイドンが60をうけた ❤️ サイドン残りHP 272 サイドン技いわなだれを使った! サンダースが136を受けた ⭐️ サンダース残りHP 0 battle実行されました 2 2 モンテカルロ 8 c1 (Jolteon(0), PinMissle, [Jolteon(0)]) サンダースが技ミサイルばりをつかった! サイドンが20をうけた ❤️ サイドン残りHP 252 サイドン技なみのりを使った! サンダースが0を受けた ⭐️ サンダース残りHP 0 battle実行されました 3 1 モンテカルロ 9 c1 (Jolteon(0), Thunderbolt, [Jolteon(0)]) サンダースが技10万ボルトをつかった! こうかはいまひとつ... 0 サイドンが0をうけた ❤️ サイドン残りHP 252 サイドン技いわなだれを使った! 急所に当たりました！こうかはばつぐんだ！ 1.5 サンダースが0を受けた ⭐️ サンダース残りHP 0 前 child_nodes 4 child_node.p 1.0365372e-36 child_node.p 1.4505721e-18 child_node.p 1.0 child_node.p 1.6264764e-22 pucb_values [0.0, 0.0, 0.0, 0.0] argmax 0 turn 4 len(pucb_values) 4 index <class 'numpy.int64'> index 0 len(self.child_nodes) 4 self.child_nodes [<__main__.pv_mcts_scores.<locals>.Node object at 0x303043a50>, <__main__.pv_mcts_scores.<locals>.Node object at 0x303650c10>, <__main__.pv_mcts_scores.<locals>.Node object at 0x30369fe50>, <__main__.pv_mcts_scores.<locals>.Node object at 0x3036d2110>] 1/1 [==============================] - 0s 8ms/step policies [4.088272e-31, 1.0, 1.9188026e-18, 1.2690159e-25] value [1.] battle実行されました 0 2 モンテカルロ 6 c1 (Jolteon(0), BodySlam, [Jolteon(0)]) サンダースが技のしかかりをつかった! こうかはいまひとつ... 0.5 サイドンが21をうけた ❤️ サイドン残りHP 231 サイドン技なみのりを使った! サンダースが0を受けた ⭐️ サンダース残りHP 0 battle実行されました 1 1 モンテカルロ 7 c1 (Jolteon(0), DoubleKick, [Jolteon(0)]) サンダースが技にどげりをつかった! こうかはばつぐんだ！ 2 サイドンが62をうけた ❤️ サイドン残りHP 169 サイドン技いわなだれを使った! サンダースが0を受けた ⭐️ サンダース残りHP 0 battle実行されました 2 2 モンテカルロ 8 c1 (Jolteon(0), PinMissle, [Jolteon(0)]) サンダースが技ミサイルばりをつかった! サイドンが20をうけた ❤️ サイドン残りHP 149 サイドン技なみのりを使った! サンダースが0を受けた ⭐️ サンダース残りHP 0 battle実行されました 3 1 モンテカルロ 9 c1 (Jolteon(0), Thunderbolt, [Jolteon(0)]) サンダースが技10万ボルトをつかった! こうかはいまひとつ... 0 サイドンが0をうけた ❤️ サイドン残りHP 149 サイドン技いわなだれを使った! 技は外れました！サンダースが0を受けた ⭐️ サンダース残りHP 0 前 child_nodes 4 child_node.p 1.0365372e-36 child_node.p 1.4505721e-18 child_node.p 1.0 child_node.p 1.6264764e-22 pucb_values [array([-1.], dtype=float32), 1.4505721497113927e-18, 1.0, 1.6264764436105582e-22] --------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[8], line 48 46 c2hp=player1[0].actual_hp 47 result=((c1,-1,-1,c1hp),(c2,-1,-1,c2hp)) ---> 48 next_action=action1(result) 49 winner = battle.get_winner() 50 #ゲーム終了時 Cell In[5], line 120, in pv_mcts_action.<locals>.pv_mcts_action(state) 119 def pv_mcts_action(state): --> 120 scores = pv_mcts_scores(model, state, temperature,winner) 121 rng=np.random.default_rng() 122 return rng.choice([0,1,2,3], p=scores) Cell In[5], line 105, in pv_mcts_scores(model, state, temperature, winner) 103 # 複数回の評価の実行 104 for _ in range(PV_EVALUATE_COUNT): --> 105 root_node.evaluate() 107 # 合法手の確率分布 108 scores = nodes_to_scores(root_node.child_nodes) Cell In[5], line 67, in pv_mcts_scores.<locals>.Node.evaluate(self) 62 return value 64 # 子ノードが存在する時 65 else: 66 # アーク評価値が最大の子ノードの評価で価値を取得 ---> 67 value = self.next_child_node().evaluate() 69 # 累計価値と試行回数の更新 70 self.w += value Cell In[5], line 89, in pv_mcts_scores.<locals>.Node.next_child_node(self) 87 # アーク評価値が最大の子ノードを返す 88 print("pucb_values",pucb_values) ---> 89 print("argmax",np.argmax(pucb_values)) 90 print("turn",self.turn) 91 print("len(pucb_values)",len(pucb_values)) File ~/.pyenv/versions/anaconda3-2023.09-0/lib/python3.11/site-packages/numpy/core/fromnumeric.py:1229, in argmax(a, axis, out, keepdims) 1142 """ 1143 Returns the indices of the maximum values along an axis. 1144 (...) 1226 (2, 1, 4) 1227 """ 1228 kwds = {'keepdims': keepdims} if keepdims is not np._NoValue else {} -> 1229 return _wrapfunc(a, 'argmax', axis=axis, out=out, **kwds) File ~/.pyenv/versions/anaconda3-2023.09-0/lib/python3.11/site-packages/numpy/core/fromnumeric.py:56, in _wrapfunc(obj, method, *args, **kwds) 54 bound = getattr(obj, method, None) 55 if bound is None: ---> 56 return _wrapit(obj, method, *args, **kwds) 58 try: 59 return bound(*args, **kwds) File ~/.pyenv/versions/anaconda3-2023.09-0/lib/python3.11/site-packages/numpy/core/fromnumeric.py:45, in _wrapit(obj, method, *args, **kwds) 43 except AttributeError: 44 wrap = None ---> 45 result = getattr(asarray(obj), method)(*args, **kwds) 46 if wrap: 47 if not isinstance(result, mu.ndarray): ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (4,) + inhomogeneous part.

miraimirai

2024/06/13 03:16

例えば、 a=[0,1,2,3] print(a[[0,1,2]]) を実行するとエラーが出ます。 --------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[9], line 2 1 a=[0,1,2,3] ----> 2 print(a[[0,1,2]]) TypeError: list indices must be integers or slices, not list

melian

2024/06/13 03:22

> TypeError: list indices must be integers or slices, not list a が Python のリストだからです。numpy.ndarray であればリストによる indexing が可能です。 a = np.array([0,1,2,3]) print(a[[0,1,2]])

miraimirai

2024/06/13 03:27 編集

リスト、list index out of rangeの回答ありがとうございます。特にリストは前々からなぜなのかわからなかったので助かりました。

melian

2024/06/13 03:50

> 別の質問です。ループが止まりません。現状のコードがどの様になっているのか判りませんし、"list index out of range" エラーを修正することができたのかどうかも不明なので新規に質問を立てる方がよいでしょう。まずは、書籍のサンプルコードを利用して書籍の解説内容を充分に理解することが必要かと思います。このままでは時間と労力を際限なく浪費し続けることになってしまうでしょうから。

miraimirai

2024/06/13 03:51 編集

ありがとうございます。新規に立ててみます