質問編集履歴
2
画像、コードを追加しました。
title
CHANGED
File without changes
|
body
CHANGED
@@ -106,4 +106,127 @@
|
|
106
106
|
if tau >= 0:
|
107
107
|
reward = self.discount_rewards([r for r in h_r[tau+1:tau+n]])
|
108
108
|
self._memorize(h_s[tau], self.history[tau], reward, next_state,h_p[tau], done, h_i[tau])
|
109
|
+
```
|
110
|
+
|
111
|
+

|
112
|
+

|
113
|
+

|
114
|
+
|
115
|
+
コードを追加します。
|
116
|
+
```python
|
117
|
+
def __init__(self, path, window_size, skip, save=False, saver_path=None, restore=False,sess=None, noise=True,norm=True):
|
118
|
+
self.path = path
|
119
|
+
self.window_size = window_size
|
120
|
+
self._preproc()
|
121
|
+
self.state_size = (None, self.window_size, self.df.shape[-1])
|
122
|
+
self.skip = skip
|
123
|
+
self.reward = reward
|
124
|
+
self.memory = Memory(self.MEMORY_SIZE)
|
125
|
+
self.mem = Memory
|
126
|
+
#
|
127
|
+
self.ent_coef = ent_coef
|
128
|
+
self.target_entropy = target_entropy
|
129
|
+
self.sess = sess
|
130
|
+
self.actor = Actor('actor-original', self.state_size, self.OUTPUT_SIZE,noise,norm)
|
131
|
+
self.critic = Critic('critic-original', self.state_size, self.OUTPUT_SIZE, self.LEARNING_RATE,noise,norm)
|
132
|
+
self.critic_target = Critic('critic-target', self.state_size, self.OUTPUT_SIZE, self.LEARNING_RATE,noise,norm)
|
133
|
+
self.icm = ICM(self.state_size, self.OUTPUT_SIZE, self.LEARNING_RATE)
|
134
|
+
|
135
|
+
with tf.variable_scope("loss"):
|
136
|
+
if self.target_entropy == 'auto':
|
137
|
+
self.target_entropy = -np.prod(self.OUTPUT_SIZE).astype(np.float32)
|
138
|
+
self.target_entropy = float(self.target_entropy)
|
139
|
+
|
140
|
+
entropy = tf.reduce_mean(self.actor.entropy)
|
141
|
+
|
142
|
+
if isinstance(self.ent_coef, str) and self.ent_coef.startswith('auto'):
|
143
|
+
init_value = 1.0
|
144
|
+
if '_' in self.ent_coef:
|
145
|
+
init_value = float(self.ent_coef.split('_')[1])
|
146
|
+
assert init_value > 0., "The initial value of ent_coef must be greater than 0"
|
147
|
+
|
148
|
+
self.log_ent_coef = tf.get_variable('log_ent_coef', dtype=tf.float32,initializer=np.log(init_value).astype(np.float32))
|
149
|
+
self.ent_coef = tf.exp(self.log_ent_coef)
|
150
|
+
else:
|
151
|
+
self.ent_coef = float(self.ent_coef)
|
152
|
+
|
153
|
+
min_qf = tf.minimum(self.critic.qf1, self.critic.qf2)
|
154
|
+
ent_coef_loss, entropy_optimizer = None, None
|
155
|
+
if not isinstance(self.ent_coef, float):
|
156
|
+
ent_coef_loss = -tf.reduce_mean(
|
157
|
+
self.log_ent_coef * tf.stop_gradient(self.actor.log_pi + self.target_entropy))
|
158
|
+
|
159
|
+
policy_kl_loss = tf.reduce_mean(self.ent_coef * self.actor.log_pi - self.critic.qf1)
|
160
|
+
policy_loss = policy_kl_loss
|
161
|
+
v_backup = tf.stop_gradient(min_qf - self.ent_coef * self.actor.log_pi)
|
162
|
+
value_loss = 0.5 * tf.reduce_mean((self.critic.value_fn - v_backup) ** 2)
|
163
|
+
value_loss += self.critic.td_loss
|
164
|
+
self.policy_loss = policy_loss
|
165
|
+
self.actor_optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE,name="actor_optimizer").minimize(policy_loss,var_list=get_vars("actor-original"))
|
166
|
+
self.vf_optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE,name="actor_optimizer").minimize(value_loss,var_list=get_vars("critic-original"))
|
167
|
+
self.entropy_optimizer = tf.train.AdamOptimizer(learning_rate=self.LEARNING_RATE,name="entropy_optimizer").minimize(ent_coef_loss,var_list=self.log_ent_coef)
|
168
|
+
|
169
|
+
self.save = save
|
170
|
+
self.saver = tf.train.Saver(tf.global_variables())
|
171
|
+
self.actor_saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "actor-original"))
|
172
|
+
self.saver_path = saver_path
|
173
|
+
|
174
|
+
if restore==True:
|
175
|
+
self.saver.restore(self.sess,self.saver_path)
|
176
|
+
self.actor_saver.restore(self.sess,"actor_sac")
|
177
|
+
else:
|
178
|
+
self.sess.run(tf.global_variables_initializer())
|
179
|
+
```
|
180
|
+
|
181
|
+
環境の準備
|
182
|
+
```ここに言語を入力
|
183
|
+
def _preproc(self):
|
184
|
+
df = pd.read_csv(self.path)
|
185
|
+
X = df[["Close"]]
|
186
|
+
X = MinMaxScaler().fit_transform(X)
|
187
|
+
# X = np.asanyarray(X)
|
188
|
+
|
189
|
+
gen = tf.keras.preprocessing.sequence.TimeseriesGenerator(X, np.asanyarray(df[["Open"]])[-len(X)::], self.window_size)
|
190
|
+
x = []
|
191
|
+
y = []
|
192
|
+
for i in gen:
|
193
|
+
x.extend(i[0].tolist())
|
194
|
+
x = np.asanyarray(x)
|
195
|
+
|
196
|
+
self.df = x[-self.STEP_SIZE::]
|
197
|
+
self.trend = np.asanyarray(df[["Open"]])[-len(self.df)::]
|
198
|
+
```
|
199
|
+
|
200
|
+
探査
|
201
|
+
```ここに言語を入力
|
202
|
+
def _select_action(self, state,next_state=None):
|
203
|
+
prediction, self.init_value = self.sess.run([self.actor.logits,self.actor.last_state],
|
204
|
+
feed_dict={self.actor.X:[state],self.actor.initial_state:self.init_value})
|
205
|
+
self.pred = prediction
|
206
|
+
action = exploration(prediction[0], self.OUTPUT_SIZE,self.EPSILON)
|
207
|
+
if next_state is not None:
|
208
|
+
self.ri = self.sess.run(self.icm.ri,
|
209
|
+
feed_dict={self.icm.state:[state],self.icm.next_state:[next_state],self.icm.action:action.reshape((-1,1))})
|
210
|
+
return action
|
211
|
+
```
|
212
|
+
エージェントの評価
|
213
|
+
```ここに言語を入力
|
214
|
+
def buy(self, spread, pip_cost, sl):
|
215
|
+
position = 3
|
216
|
+
pip = []
|
217
|
+
states = []
|
218
|
+
spread = spread / pip_cost
|
219
|
+
loscut = False
|
220
|
+
self.init_value = np.zeros((1, 512))
|
221
|
+
for t in range(0, len(self.trend), self.skip):
|
222
|
+
state = self.get_state(t)
|
223
|
+
action = self._select_action(state)
|
224
|
+
|
225
|
+
states,pip,position = self.reward(self.trend,t,pip,action,position,states,pip_cost,spread)
|
226
|
+
|
227
|
+
if len(pip) != 0:
|
228
|
+
self.pip = np.asanyarray(pip)
|
229
|
+
total_pip = np.sum(self.pip)
|
230
|
+
# st ate = next_state
|
231
|
+
return total_pip, self.pip
|
109
232
|
```
|
1
reward関数を変更しました。
title
CHANGED
File without changes
|
body
CHANGED
@@ -7,51 +7,37 @@
|
|
7
7
|
```python
|
8
8
|
import numpy as np
|
9
9
|
|
10
|
-
def
|
10
|
+
def reward2(double trend,list pip,int action,int position,list states,double pip_cost,double spread):
|
11
|
+
cdef list r = []
|
12
|
+
cdef int sub = 0
|
13
|
+
if position != 3:
|
11
|
-
|
14
|
+
if action == 0:
|
15
|
+
p = [(trend - s) * pip_cost for s in states]
|
16
|
+
else:
|
17
|
+
p = [(s - trend) * pip_cost for s in states]
|
18
|
+
spread *= -1
|
19
|
+
|
12
|
-
if
|
20
|
+
if action == position:
|
13
|
-
states = [trend[t] + spread]
|
14
|
-
position = 1
|
15
|
-
elif position == 1:
|
16
|
-
|
21
|
+
sub = 0
|
17
|
-
p = [(trend[t] - s) * pip_cost for s in states]
|
18
|
-
|
22
|
+
for b in range(0, len(p)):
|
19
|
-
|
23
|
+
r = [-40.0, True] if p[b] <= -40 else [p[b], False]
|
20
|
-
|
24
|
+
if r[1]:
|
21
|
-
|
25
|
+
pip.append(r[0])
|
22
|
-
|
26
|
+
states.pop(b - sub)
|
23
|
-
|
27
|
+
sub += 1
|
24
|
-
states.append(trend
|
28
|
+
states.append(trend+ spread)
|
25
|
-
position =
|
29
|
+
position = action
|
26
|
-
elif position == 2:
|
27
|
-
p = [(s - trend[t]) * pip_cost for s in states]
|
28
|
-
for b in p:
|
29
|
-
b = np.asanyarray([-40.0]) if b <= -40 else b
|
30
|
-
pip.extend(b.tolist())
|
31
|
-
states = [trend[t] + spread]
|
32
|
-
position = 1
|
33
|
-
elif action == 1:
|
34
|
-
if position == 3:
|
35
|
-
states = [trend[t] - spread]
|
36
|
-
position = 2
|
37
|
-
elif position == 2:
|
38
|
-
sub = 0
|
39
|
-
p = [(s - trend[t]) * pip_cost for s in states]
|
40
|
-
for b in range(0,len(p)):
|
41
|
-
r = [np.asanyarray([-40.0]), True] if p[b] <= -40 else [p[b], False]
|
42
|
-
|
30
|
+
else:
|
43
|
-
pip.extend(r[0].tolist())
|
44
|
-
states.pop(b-sub)
|
45
|
-
sub += 1
|
46
|
-
states.append(trend[t] - spread)
|
47
|
-
position = 2
|
48
|
-
elif position == 1:
|
49
|
-
p = [(trend[t] - s) * pip_cost for s in states]
|
50
31
|
for b in p:
|
51
|
-
b =
|
32
|
+
b = -40.0 if b <= -40 else b
|
52
|
-
pip.
|
33
|
+
pip.append(b)
|
53
|
-
states = [trend
|
34
|
+
states = [trend+ spread]
|
54
|
-
position =
|
35
|
+
position = action
|
36
|
+
|
37
|
+
else:
|
38
|
+
states = [trend + spread] if action == 0 else [trend - spread]
|
39
|
+
position = action
|
40
|
+
|
55
41
|
return states,pip,position
|
56
42
|
```
|
57
43
|
アクションの決定
|