teratail header banner
teratail header banner
質問するログイン新規登録

質問編集履歴

2

画像、コードを追加しました。

2019/08/17 12:50

投稿

退会済みユーザー
title CHANGED
File without changes
body CHANGED
@@ -106,4 +106,127 @@
106
106
  if tau >= 0:
107
107
  reward = self.discount_rewards([r for r in h_r[tau+1:tau+n]])
108
108
  self._memorize(h_s[tau], self.history[tau], reward, next_state,h_p[tau], done, h_i[tau])
109
+ ```
110
+
111
+ ![イメージ説明](8cdc0c9ccdf06fc79cf6c30ca74cc6a7.jpeg)
112
+ ![イメージ説明](dcf8a255d415502e6ab4ba6bc0844e31.jpeg)
113
+ ![イメージ説明](6cb04c66bde14ec8ecbd8feb1ed7be66.jpeg)
114
+
115
+ コードを追加します。
116
+ ```python
117
+ def __init__(self, path, window_size, skip, save=False, saver_path=None, restore=False,sess=None, noise=True,norm=True):
118
+ self.path = path
119
+ self.window_size = window_size
120
+ self._preproc()
121
+ self.state_size = (None, self.window_size, self.df.shape[-1])
122
+ self.skip = skip
123
+ self.reward = reward
124
+ self.memory = Memory(self.MEMORY_SIZE)
125
+ self.mem = Memory
126
+ #
127
+ self.ent_coef = ent_coef
128
+ self.target_entropy = target_entropy
129
+ self.sess = sess
130
+ self.actor = Actor('actor-original', self.state_size, self.OUTPUT_SIZE,noise,norm)
131
+ self.critic = Critic('critic-original', self.state_size, self.OUTPUT_SIZE, self.LEARNING_RATE,noise,norm)
132
+ self.critic_target = Critic('critic-target', self.state_size, self.OUTPUT_SIZE, self.LEARNING_RATE,noise,norm)
133
+ self.icm = ICM(self.state_size, self.OUTPUT_SIZE, self.LEARNING_RATE)
134
+
135
+ with tf.variable_scope("loss"):
136
+ if self.target_entropy == 'auto':
137
+ self.target_entropy = -np.prod(self.OUTPUT_SIZE).astype(np.float32)
138
+ self.target_entropy = float(self.target_entropy)
139
+
140
+ entropy = tf.reduce_mean(self.actor.entropy)
141
+
142
+ if isinstance(self.ent_coef, str) and self.ent_coef.startswith('auto'):
143
+ init_value = 1.0
144
+ if '_' in self.ent_coef:
145
+ init_value = float(self.ent_coef.split('_')[1])
146
+ assert init_value > 0., "The initial value of ent_coef must be greater than 0"
147
+
148
+ self.log_ent_coef = tf.get_variable('log_ent_coef', dtype=tf.float32,initializer=np.log(init_value).astype(np.float32))
149
+ self.ent_coef = tf.exp(self.log_ent_coef)
150
+ else:
151
+ self.ent_coef = float(self.ent_coef)
152
+
153
+ min_qf = tf.minimum(self.critic.qf1, self.critic.qf2)
154
+ ent_coef_loss, entropy_optimizer = None, None
155
+ if not isinstance(self.ent_coef, float):
156
+ ent_coef_loss = -tf.reduce_mean(
157
+ self.log_ent_coef * tf.stop_gradient(self.actor.log_pi + self.target_entropy))
158
+
159
+ policy_kl_loss = tf.reduce_mean(self.ent_coef * self.actor.log_pi - self.critic.qf1)
160
+ policy_loss = policy_kl_loss
161
+ v_backup = tf.stop_gradient(min_qf - self.ent_coef * self.actor.log_pi)
162
+ value_loss = 0.5 * tf.reduce_mean((self.critic.value_fn - v_backup) ** 2)
163
+ value_loss += self.critic.td_loss
164
+ self.policy_loss = policy_loss
165
+ self.actor_optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE,name="actor_optimizer").minimize(policy_loss,var_list=get_vars("actor-original"))
166
+ self.vf_optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE,name="actor_optimizer").minimize(value_loss,var_list=get_vars("critic-original"))
167
+ self.entropy_optimizer = tf.train.AdamOptimizer(learning_rate=self.LEARNING_RATE,name="entropy_optimizer").minimize(ent_coef_loss,var_list=self.log_ent_coef)
168
+
169
+ self.save = save
170
+ self.saver = tf.train.Saver(tf.global_variables())
171
+ self.actor_saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "actor-original"))
172
+ self.saver_path = saver_path
173
+
174
+ if restore==True:
175
+ self.saver.restore(self.sess,self.saver_path)
176
+ self.actor_saver.restore(self.sess,"actor_sac")
177
+ else:
178
+ self.sess.run(tf.global_variables_initializer())
179
+ ```
180
+
181
+ 環境の準備
182
+ ```ここに言語を入力
183
+ def _preproc(self):
184
+ df = pd.read_csv(self.path)
185
+ X = df[["Close"]]
186
+ X = MinMaxScaler().fit_transform(X)
187
+ # X = np.asanyarray(X)
188
+
189
+ gen = tf.keras.preprocessing.sequence.TimeseriesGenerator(X, np.asanyarray(df[["Open"]])[-len(X)::], self.window_size)
190
+ x = []
191
+ y = []
192
+ for i in gen:
193
+ x.extend(i[0].tolist())
194
+ x = np.asanyarray(x)
195
+
196
+ self.df = x[-self.STEP_SIZE::]
197
+ self.trend = np.asanyarray(df[["Open"]])[-len(self.df)::]
198
+ ```
199
+
200
+ 探査
201
+ ```ここに言語を入力
202
+ def _select_action(self, state,next_state=None):
203
+ prediction, self.init_value = self.sess.run([self.actor.logits,self.actor.last_state],
204
+ feed_dict={self.actor.X:[state],self.actor.initial_state:self.init_value})
205
+ self.pred = prediction
206
+ action = exploration(prediction[0], self.OUTPUT_SIZE,self.EPSILON)
207
+ if next_state is not None:
208
+ self.ri = self.sess.run(self.icm.ri,
209
+ feed_dict={self.icm.state:[state],self.icm.next_state:[next_state],self.icm.action:action.reshape((-1,1))})
210
+ return action
211
+ ```
212
+ エージェントの評価
213
+ ```ここに言語を入力
214
+ def buy(self, spread, pip_cost, sl):
215
+ position = 3
216
+ pip = []
217
+ states = []
218
+ spread = spread / pip_cost
219
+ loscut = False
220
+ self.init_value = np.zeros((1, 512))
221
+ for t in range(0, len(self.trend), self.skip):
222
+ state = self.get_state(t)
223
+ action = self._select_action(state)
224
+
225
+ states,pip,position = self.reward(self.trend,t,pip,action,position,states,pip_cost,spread)
226
+
227
+ if len(pip) != 0:
228
+ self.pip = np.asanyarray(pip)
229
+ total_pip = np.sum(self.pip)
230
+ # st ate = next_state
231
+ return total_pip, self.pip
109
232
  ```

1

reward関数を変更しました。

2019/08/17 12:50

投稿

退会済みユーザー
title CHANGED
File without changes
body CHANGED
@@ -7,51 +7,37 @@
7
7
  ```python
8
8
  import numpy as np
9
9
 
10
- def reward(trend,t,pip,action,position,states,pip_cost,spread):
10
+ def reward2(double trend,list pip,int action,int position,list states,double pip_cost,double spread):
11
+ cdef list r = []
12
+ cdef int sub = 0
13
+ if position != 3:
11
- if action == 0:
14
+ if action == 0:
15
+ p = [(trend - s) * pip_cost for s in states]
16
+ else:
17
+ p = [(s - trend) * pip_cost for s in states]
18
+ spread *= -1
19
+
12
- if position == 3:
20
+ if action == position:
13
- states = [trend[t] + spread]
14
- position = 1
15
- elif position == 1:
16
- sub = 0
21
+ sub = 0
17
- p = [(trend[t] - s) * pip_cost for s in states]
18
- for b in range(0,len(p)):
22
+ for b in range(0, len(p)):
19
- r = [np.asanyarray([-40.0]), True] if p[b] <= -40 else [p[b], False]
23
+ r = [-40.0, True] if p[b] <= -40 else [p[b], False]
20
- if r[1]:
24
+ if r[1]:
21
- pip.extend(r[0].tolist())
25
+ pip.append(r[0])
22
- states.pop(b-sub)
26
+ states.pop(b - sub)
23
- sub += 1
27
+ sub += 1
24
- states.append(trend[t] + spread)
28
+ states.append(trend+ spread)
25
- position = 1
29
+ position = action
26
- elif position == 2:
27
- p = [(s - trend[t]) * pip_cost for s in states]
28
- for b in p:
29
- b = np.asanyarray([-40.0]) if b <= -40 else b
30
- pip.extend(b.tolist())
31
- states = [trend[t] + spread]
32
- position = 1
33
- elif action == 1:
34
- if position == 3:
35
- states = [trend[t] - spread]
36
- position = 2
37
- elif position == 2:
38
- sub = 0
39
- p = [(s - trend[t]) * pip_cost for s in states]
40
- for b in range(0,len(p)):
41
- r = [np.asanyarray([-40.0]), True] if p[b] <= -40 else [p[b], False]
42
- if r[1]:
30
+ else:
43
- pip.extend(r[0].tolist())
44
- states.pop(b-sub)
45
- sub += 1
46
- states.append(trend[t] - spread)
47
- position = 2
48
- elif position == 1:
49
- p = [(trend[t] - s) * pip_cost for s in states]
50
31
  for b in p:
51
- b = np.asanyarray([-40.0]) if b <= -40 else b
32
+ b = -40.0 if b <= -40 else b
52
- pip.extend(b.tolist())
33
+ pip.append(b)
53
- states = [trend[t] - spread]
34
+ states = [trend+ spread]
54
- position = 2
35
+ position = action
36
+
37
+ else:
38
+ states = [trend + spread] if action == 0 else [trend - spread]
39
+ position = action
40
+
55
41
  return states,pip,position
56
42
  ```
57
43
  アクションの決定