質問編集履歴

1

追記

2022/03/10 22:34

投稿

junko_kobayashi
junko_kobayashi

スコア11

test CHANGED
File without changes
test CHANGED
@@ -70,3 +70,23 @@
70
70
 
71
71
 
72
72
 
73
+ 追記
74
+ ```Python
75
+ q_func.to_gpu(0) ## GPUを使いたい人はこのコメントを外す
76
+
77
+ optimizer = chainer.optimizers.Adam(eps=1e-2)
78
+ optimizer.setup(q_func) #設計したq関数の最適化にAdamを使う
79
+ gamma = 0.95
80
+ explorer = chainerrl.explorers.ConstantEpsilonGreedy(
81
+ epsilon=0.3, random_action_func=env.action_space.sample)
82
+ replay_buffer = chainerrl.replay_buffer.ReplayBuffer(capacity = 10**6)
83
+ phi = lambda x:x.astype(np.float32, copy=False)##型の変換(chainerはfloat32型。float64は駄目)
84
+
85
+ agent = chainerrl.agents.DoubleDQN(
86
+ q_func, optimizer, replay_buffer, gamma, explorer,
87
+ replay_start_size=500, update_interval=1,
88
+ target_update_interval=100, phi=phi)
89
+ ```
90
+
91
+
92
+