質問編集履歴
2
test
CHANGED
File without changes
|
test
CHANGED
File without changes
|
1
コード追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -20,6 +20,34 @@
|
|
20
20
|
|
21
21
|
```ここに言語を入力
|
22
22
|
|
23
|
+
class Actor:
|
24
|
+
|
25
|
+
def get_action(self, state, episode, targetQN): # [C]t+1での行動を返す
|
26
|
+
|
27
|
+
# 徐々に最適行動のみをとる、ε-greedy法
|
28
|
+
|
29
|
+
epsilon = 0.001 + 0.9 / (1.0 + episode)
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
if epsilon <= np.random.uniform(0, 1):
|
34
|
+
|
35
|
+
retTargetQs = targetQN.forward(state)[0]
|
36
|
+
|
37
|
+
action = np.argmax(retTargetQs) # 最大の報酬を返す行動を選択する
|
38
|
+
|
39
|
+
|
40
|
+
|
41
|
+
else:
|
42
|
+
|
43
|
+
action = np.random.choice([0, 1]) # ランダムに行動する
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
return action
|
48
|
+
|
49
|
+
|
50
|
+
|
23
51
|
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
24
52
|
|
25
53
|
mainQN = Network().to(device)
|