質問編集履歴
1
追記
test
CHANGED
File without changes
|
test
CHANGED
@@ -70,3 +70,23 @@
|
|
70
70
|
|
71
71
|
|
72
72
|
|
73
|
+
追記
|
74
|
+
```Python
|
75
|
+
q_func.to_gpu(0) ## GPUを使いたい人はこのコメントを外す
|
76
|
+
|
77
|
+
optimizer = chainer.optimizers.Adam(eps=1e-2)
|
78
|
+
optimizer.setup(q_func) #設計したq関数の最適化にAdamを使う
|
79
|
+
gamma = 0.95
|
80
|
+
explorer = chainerrl.explorers.ConstantEpsilonGreedy(
|
81
|
+
epsilon=0.3, random_action_func=env.action_space.sample)
|
82
|
+
replay_buffer = chainerrl.replay_buffer.ReplayBuffer(capacity = 10**6)
|
83
|
+
phi = lambda x:x.astype(np.float32, copy=False)##型の変換(chainerはfloat32型。float64は駄目)
|
84
|
+
|
85
|
+
agent = chainerrl.agents.DoubleDQN(
|
86
|
+
q_func, optimizer, replay_buffer, gamma, explorer,
|
87
|
+
replay_start_size=500, update_interval=1,
|
88
|
+
target_update_interval=100, phi=phi)
|
89
|
+
```
|
90
|
+
|
91
|
+
|
92
|
+
|