質問編集履歴

2020/05/26 12:22

投稿

スコア123

title CHANGED Viewed

File without changes

body CHANGED Viewed

File without changes

コード追加

2020/05/26 12:22

投稿

スコア123

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -9,6 +9,20 @@
 ```ここに言語を入力
+class Actor:
+    def get_action(self, state, episode, targetQN):  # [C]ｔ＋１での行動を返す
+        # 徐々に最適行動のみをとる、ε-greedy法
+        epsilon = 0.001 + 0.9 / (1.0 + episode)
+        if epsilon <= np.random.uniform(0, 1):
+            retTargetQs = targetQN.forward(state)[0]
+            action = np.argmax(retTargetQs)  # 最大の報酬を返す行動を選択する
+        else:
+            action = np.random.choice([0, 1])  # ランダムに行動する
+        return action
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 mainQN = Network().to(device)
 optimizer = optim.Adam(mainQN.parameters(), lr=learning_rate)