編集履歴

質問編集履歴

1番目のコードでのポジションを少し変更しました

2019/08/18 06:28

投稿

退会済みユーザー

スコア0

test CHANGED Viewed

File without changes

test CHANGED Viewed

@@ -66,15 +66,65 @@
             states = [trend + spread]
+            position = 0
+        # 2(pの計算だけ違うがそれを除けば同じ) : 損切の確認、statesに値を追加
+        elif position == 0:
+          sub = 0
+          p = [(trend - s) * pip_cost for s in states]
+          for b in range(0,len(p)):
+            r = [-40.0, True] if p[b] <= -40 else [p[b], False]
+            if r[1]:
+              pip.append(r[0])
+              states.pop(b-sub)
+              sub += 1
+            states.append(trend[t] + spread)
+            position = 0
+        # 3(pの計算が違うだけ) : 利益確定をし新しい状態を持つ
+        elif position == 1:
+          p = [(s - trend) * pip_cost for s in states]
+          for b in p:
+            b = -40.0 if b <= -40 else b
+            pip.append(b)
+          states = [trend + spread]
+          position = 0
+    elif action == 1:
+        # 1
+        if position == 3:
+            states = [trend - spread]
             position = 1
-        # 2(pの計算だけ違うがそれを除けば同じ) : 損切の確認、statesに値を追加
+        # 2
         elif position == 1:
           sub = 0
-          p = [(trend - s) * pip_cost for s in states]
+          p = [(s - trend) * pip_cost for s in states]
           for b in range(0,len(p)):
@@ -88,75 +138,119 @@
               sub += 1
-            states.append(trend[t] + spread)
+          states.append(trend - spread)
+          position = 1
+       # 3
+        elif position == 0:
+            p = [(trend - s) * pip_cost for s in states]
+            for b in p:
+              b = -40.0 if b <= -40 else b
+              pip.append(b)
+            states = [trend[t] - spread]
             position = 1
+    return states,pip,position
+```
+```python
+'''
+一番目のコードと同様に番号を振る。
-        # 3(pの計算が違うだけ) : 利益確定をし新しい状態を持つ
+速度アップを図るためにcythonを使用しています。
+'''
+def reward2(double trend,list pip,int action,int position,list states,double pip_cost,double spread):
-        elif position == 2:
+    cdef list r
-          p = [(s - trend) * pip_cost for s in states]
-          for b in p:
+    cdef list p
-            b = -40.0 if b <= -40 else b
-            pip.append(b)
-          states = [trend + spread]
-          position = 1
-    elif action == 1:
+    cdef int sub = 0
-        # 1
+    # 2と3 : アクションの値によって式が異なるが同じ処理（1番目のコードと）
-        if position == 3:
+   if position != 3:
-            states = [trend - spread]
-            position = 2
-        # 2
-        elif position == 2:
+        if action == 0:
-          sub = 0
+            #pipの計算
-          p = [(s - trend) * pip_cost for s in states]
-          for b in range(0,len(p)):
-            r = [-40.0, True] if p[b] <= -40 else [p[b], False]
-            if r[1]:
-              pip.append(r[0])
-              states.pop(b-sub)
-              sub += 1
-          states.append(trend - spread)
-          position = 2
-       # 3
-        elif position == 1:
             p = [(trend - s) * pip_cost for s in states]
+        else:
+            p = [(s - trend) * pip_cost for s in states]
+            spread *= -1
+        # positionの値をアクションと同様にしています。
+        # 2 : 前回のアクションと同様なら実行する。
+        if action == position:
+            for b in range(0, len(p)):
+                # tl = 40を超えていれば損切とみなし、-40に固定する。
+                r = True if p[b] <= -40 else False
+                # 発生している場合は該当の状態を削除する（2重評価を避けるため）
+                if r:
+                    pip.append(-40)
+                    states.pop(b - sub)
+                    sub += 1
+            states.append(trend + spread)
+            position = action
+        # 3 : 前回のアクションと異なるなら実行する
+        else:
             for b in p:
               b = -40.0 if b <= -40 else b
               pip.append(b)
-            states = [trend[t] - spread]
+            states = [trend + spread]
-            position = 2
+            position = action
+    # 1 : time stepが0の時に実行する。
+    if position == 3:
+        states = [trend + spread] if action == 0 else [trend - spread]
+        position = action
     return states,pip,position
@@ -164,274 +258,180 @@
-```python
-'''
-一番目のコードと同様に番号を振る。
-速度アップを図るためにcythonを使用しています。
-'''
-def reward2(double trend,list pip,int action,int position,list states,double pip_cost,double spread):
-    cdef list r
-    cdef list p
-    cdef int sub = 0
-    # 2と3 : アクションの値によって式が異なるが同じ処理（1番目のコードと）
-   if position != 3:
-        if action == 0:
-            #pipの計算
-            p = [(trend - s) * pip_cost for s in states]
-        else:
-            p = [(s - trend) * pip_cost for s in states]
-            spread *= -1
-        # positionの値をアクションと同様にしています。
-        # 2 : 前回のアクションと同様なら実行する。
-        if action == position:
-            for b in range(0, len(p)):
-                # tl = 40を超えていれば損切とみなし、-40に固定する。
-                r = True if p[b] <= -40 else False
-                # 発生している場合は該当の状態を削除する（2重評価を避けるため）
-                if r:
-                    pip.append(-40)
-                    states.pop(b - sub)
-                    sub += 1
-            states.append(trend + spread)
-            position = action
-        # 3 : 前回のアクションと異なるなら実行する
-        else:
-            for b in p:
-              b = -40.0 if b <= -40 else b
-              pip.append(b)
-            states = [trend + spread]
-            position = action
-    # 1 : time stepが0の時に実行する。
-    if position == 3:
-        states = [trend + spread] if action == 0 else [trend - spread]
-        position = action
-    return states,pip,position
 ```
+'''
+コードの比較
+１番目のコードの１
+if position == 3: # action == 0
+    states = [trend + spread]
+    position = 1
+if position == 3: # action == 1
+    states = [trend - spread]
+    position = 2
+2番目のコードの１
+elif position == 3:
+    states = [trend + spread] if action == 0 else [trend - spread] # action == 0 states = [trend + spread], action == 1 states = [trend - spread]
+    position = action
+1番目のコードの２
+elif position == 1: # action == 0
+    sub = 0
+    p = [(trend - s) * pip_cost for s in states]
+    for b in range(0,len(p)):
+    r = True if p[b] <= -40 else False
+    if r:
+        pip.append(-40)
+        states.pop(b-sub)
+        sub += 1
+    states.append(trend + spread)
+    position = 1
+elif position == 2: # action == 1
+  sub = 0
+  p = [(s - trend) * pip_cost for s in states]
+  for b in range(0,len(p)):
+    r = True if p[b] <= -40 else False
+    if r:
+      pip.append(-40)
+      states.pop(b-sub)
+      sub += 1
+  states.append(trend - spread)
+  position = 2
+2番目にコードの２と３
+if position != 3:
+    if action == 0:
+        p = [(trend - s) * pip_cost for s in states]
+    else:
+        p = [(s - trend) * pip_cost for s in states]
+        spread *= -1
+2番目のコードの2
+if action == position:
+    sub = 0
+    for b in range(0,len(p)):
+        r = True if p[b] <= -40 else False
+        if r:
+            pip.append(-40)
+            states.pop(b - sub)
+            sub += 1
+１番目のコードの３
+elif position == 2: # action == 0
+  p = [(s - trend) * pip_cost for s in states]
+  for b in p:
+    b = -40.0 if b <= -40 else b
+    pip.append(b)
+  states = [trend + spread]
+  states = [trend + spread]
+  position = 1
+elif position == 1: action == 1
+    p = [(trend - s) * pip_cost for s in states]
+    for b in p:
+        b = -40.0 if b <= -40 else b
+        pip.append(b)
+    states = [trend + spread]
+    position = 2
+２番目の３
+else:
+    for b in p:
+        b = -40.0 if b <= -40 else b
+        pip.append(b)
+    states = [trend + spread]
+    position = action
+'''
 ```
-'''
-コードの比較
-１番目のコードの１
-if position == 3: # action == 0
-    states = [trend + spread]
-    position = 1
-if position == 3: # action == 1
-    states = [trend - spread]
-    position = 2
-2番目のコードの１
-elif position == 3:
-    states = [trend + spread] if action == 0 else [trend - spread] # action == 0 states = [trend + spread], action == 1 states = [trend - spread]
-    position = action
-1番目のコードの２
-elif position == 1: # action == 0
-    sub = 0
-    p = [(trend - s) * pip_cost for s in states]
-    for b in range(0,len(p)):
-    r = True if p[b] <= -40 else False
-    if r:
-        pip.append(-40)
-        states.pop(b-sub)
-        sub += 1
-    states.append(trend + spread)
-    position = 1
-elif position == 2: # action == 1
-  sub = 0
-  p = [(s - trend) * pip_cost for s in states]
-  for b in range(0,len(p)):
-    r = True if p[b] <= -40 else False
-    if r:
-      pip.append(-40)
-      states.pop(b-sub)
-      sub += 1
-  states.append(trend - spread)
-  position = 2
-2番目にコードの２と３
-if position != 3:
-    if action == 0:
-        p = [(trend - s) * pip_cost for s in states]
-    else:
-        p = [(s - trend) * pip_cost for s in states]
-        spread *= -1
-2番目のコードの2
-if action == position:
-    sub = 0
-    for b in range(0,len(p)):
-        r = True if p[b] <= -40 else False
-        if r:
-            pip.append(-40)
-            states.pop(b - sub)
-            sub += 1
-１番目のコードの３
-elif position == 2: # action == 0
-  p = [(s - trend) * pip_cost for s in states]
-  for b in p:
-    b = -40.0 if b <= -40 else b
-    pip.append(b)
-  states = [trend + spread]
-  states = [trend + spread]
-  position = 1
-elif position == 1: action == 1
-    p = [(trend - s) * pip_cost for s in states]
-    for b in p:
-        b = -40.0 if b <= -40 else b
-        pip.append(b)
-    states = [trend + spread]
-    position = 2
-２番目の３
-else:
-    for b in p:
-        b = -40.0 if b <= -40 else b
-        pip.append(b)
-    states = [trend + spread]
-    position = action
-'''
-```
 ##画像

変数の説明を追加しました

2019/08/18 06:28

投稿

退会済みユーザー

スコア0

test CHANGED Viewed

File without changes

test CHANGED Viewed

@@ -18,6 +18,34 @@
 そして価格が上がることが期待されるので利益確定は、現時点での価格を10とし計算式は 10-statesになります。
+##変数の説明
+外国為替ではPIPで報酬を決定します。
+trend：報酬の決定に関する価格（その時間での始値になる）
+pip:発生した報酬をためていくリスト
+action:その時間での売るか買うどちらこの行動
+position:前回のアクション
+states:購入または売った価格が含まれたリスト
+pip_cost:例えばUSDJPYでの1PIPは、10,000* .0001 = 1.のようになります。わかりにくいと思いますが、.0001のほうがtrend-statesで、それを1PIPに変化せるための数字
+spread:購入または売り関する手数料、例えば購入なら価格が上がることが期待されるのでスプレッドを足すことになる。
+番号2と番号3では変数rの処理が違いますが、番号2では損切（損が一定以上になった場合その時点で利益確定をする）が発生しているのかどうか、番号3でも同様ですが直接数字を計算します。
 ```python
 '''
@@ -162,6 +190,8 @@
         if action == 0:
+            #pipの計算
             p = [(trend - s) * pip_cost for s in states]
         else:
@@ -413,3 +443,7 @@
 2番目のコードの結果（何回繰り返しても最初は必ずプラスになる。）
 ![イメージ説明](0a367287e4d7542fc19162515da2e244.jpeg)
+分からないことがあれば、できるだけ情報を追加したいと思います。

質問内容を変更しました。

2019/08/18 06:18

投稿

退会済みユーザー

スコア0

test CHANGED Viewed

File without changes

test CHANGED Viewed

@@ -1,24 +1,52 @@
+### 達成したいこと・問題点
-下記の2つのコードなのですが、結果は変わらないはずなのですが違いが出てきます。原因がよく理解できません。説明は2番目のコード内に書いています。
+1番目のコードと2番目のコードは流れ自体は変わりません。ただ一番目のコードは見にくく改善しようと2番目のコードに直しました。しかし、結果は同じになるはずなのですが違いが出てきてしまいます。
+### コードの説明
+コード全体の趣旨としては、アクションは0か1のみになり前回と同様のアクションが選択されれば追加の購入（スプレッドを足す）または売り（スプレッドで引く）をstatesに追加します。
+また、損切が発生している場合は、ロスカットをピップに追加し該当のstatesを削除します。
+前回とアクションが異なれば利益確定をし、新しいstatesを定義します。
+例えば価格が1の時購入しスプレッドが1だと仮定すれば, states = 1+1になります。
+tlが4だとしても途中評価がなければ不当な利益確定になってしまいます。
+そして価格が上がることが期待されるので利益確定は、現時点での価格を10とし計算式は 10-statesになります。
 ```python
+'''
+重複している部分に同じ番号を振る
+以下のように多少の計算式が異なるだけでほぼ同じ処理をしている。
+'''
-def reward(trend,t,pip,action,position,states,pip_cost,spread):
+def reward(trend,pip,action,position,states,pip_cost,spread):
     if action == 0:
+        # 1 : time stepが0の時に実行
         if position == 3:
-            states = [trend[t] + spread]
+            states = [trend + spread]
             position = 1
+        # 2(pの計算だけ違うがそれを除けば同じ) : 損切の確認、statesに値を追加
         elif position == 1:
           sub = 0
-          p = [(trend[t] - s) * pip_cost for s in states]
+          p = [(trend - s) * pip_cost for s in states]
           for b in range(0,len(p)):
@@ -36,9 +64,11 @@
             position = 1
+        # 3(pの計算が違うだけ) : 利益確定をし新しい状態を持つ
         elif position == 2:
-          p = [(s - trend[t]) * pip_cost for s in states]
+          p = [(s - trend) * pip_cost for s in states]
           for b in p:
@@ -46,43 +76,129 @@
             pip.append(b)
-          states = [trend[t] + spread]
+          states = [trend + spread]
           position = 1
     elif action == 1:
+        # 1
         if position == 3:
+            states = [trend - spread]
+            position = 2
+        # 2
+        elif position == 2:
+          sub = 0
+          p = [(s - trend) * pip_cost for s in states]
+          for b in range(0,len(p)):
+            r = [-40.0, True] if p[b] <= -40 else [p[b], False]
+            if r[1]:
+              pip.append(r[0])
+              states.pop(b-sub)
+              sub += 1
+          states.append(trend - spread)
+          position = 2
+       # 3
+        elif position == 1:
+            p = [(trend - s) * pip_cost for s in states]
+            for b in p:
+              b = -40.0 if b <= -40 else b
+              pip.append(b)
             states = [trend[t] - spread]
             position = 2
+    return states,pip,position
+```
+```python
+'''
+一番目のコードと同様に番号を振る。
+速度アップを図るためにcythonを使用しています。
+'''
+def reward2(double trend,list pip,int action,int position,list states,double pip_cost,double spread):
+    cdef list r
+    cdef list p
+    cdef int sub = 0
+    # 2と3 : アクションの値によって式が異なるが同じ処理（1番目のコードと）
-        elif position == 2:
+   if position != 3:
-          sub = 0
+        if action == 0:
+            p = [(trend - s) * pip_cost for s in states]
+        else:
-          p = [(s - trend[t]) * pip_cost for s in states]
+            p = [(s - trend) * pip_cost for s in states]
+            spread *= -1
+        # positionの値をアクションと同様にしています。
+        # 2 : 前回のアクションと同様なら実行する。
+        if action == position:
-          for b in range(0,len(p)):
+            for b in range(0, len(p)):
+                # tl = 40を超えていれば損切とみなし、-40に固定する。
-            r = [-40.0, True] if p[b] <= -40 else [p[b], False]
+                r = True if p[b] <= -40 else False
+                # 発生している場合は該当の状態を削除する（2重評価を避けるため）
-            if r[1]:
+                if r:
-              pip.append(r[0])
+                    pip.append(-40)
-              states.pop(b-sub)
+                    states.pop(b - sub)
-              sub += 1
+                    sub += 1
-          states.append(trend[t] - spread)
+            states.append(trend + spread)
-          position = 2
+            position = action
-        elif position == 1:
+        # 3 : 前回のアクションと異なるなら実行する
-            p = [(trend[t] - s) * pip_cost for s in states]
+        else:
             for b in p:
@@ -90,9 +206,21 @@
               pip.append(b)
-            states = [trend[t] - spread]
+            states = [trend + spread]
-            position = 2
+            position = action
+    # 1 : time stepが0の時に実行する。
+    if position == 3:
+        states = [trend + spread] if action == 0 else [trend - spread]
+        position = action
     return states,pip,position
@@ -100,100 +228,188 @@
-```python
-def reward2(double trend,list pip,int action,int position,list states,double pip_cost,double spread):
-    cdef list r
-    cdef list p
-    cdef int sub = 0
-    # actionは0か1つまり、買うか売るという行動以外取りません
-    if position != 3:
-        if action == 0:
-            #buyなら購入した状態から現時点での価格があっがていることを期待するため
-            p = [(trend - s) * pip_cost for s in states]
-        else:
-            #buyの逆
-            p = [(s - trend) * pip_cost for s in states]
-            spread *= -1
-        if action == position:
-            # 損切が発生しているかどうかの確認
-            for b in range(0, len(p)):
-                r = [-40.0, True] if p[b] <= -40 else [p[b], False]
-                # 発生している場合は該当の状態を削除する（2重評価を避けるため）
-                if r[1]:
-                    pip.append(r[0])
-                    states.pop(b - sub)
-                    sub += 1
-            states.append(trend + spread)
-            position = action
-        else:
-            #前回とアクションが違うなら利確を実行し、新しい状態を持つ
-            for b in p:
-              b = -40.0 if b <= -40 else b
-              pip.append(b)
-            states = [trend + spread]
-            position = action
-    # taimusutepが0の時に状態を持つ
-    if position == 3:
-        states = [trend + spread] if action == 0 else [trend - spread]
-        position = action
-    return states,pip,position
 ```
-一番目のコードは無駄に長く、省けると思ったので短くしてみたのですが報酬の結果に違いが大幅に出てきてしまいました。
-画像を追加します。
-一番目のコードが以下の画像と似通った値になります。（最初は必ずマイナスその後は大体がプラス）
-![イメージ説明](7ff6b8892e05ebc54f704b6ace6e2669.jpeg)
-2番目のコードは最初は必ずプラスでその後がほぼ10epock程度しか確認していませんが、全てマイナスの結果になります。
-![イメージ説明](37e92dbb1b0ea52815ac88685dd2f018.jpeg)
+'''
+コードの比較
+１番目のコードの１
+if position == 3: # action == 0
+    states = [trend + spread]
+    position = 1
+if position == 3: # action == 1
+    states = [trend - spread]
+    position = 2
+2番目のコードの１
+elif position == 3:
+    states = [trend + spread] if action == 0 else [trend - spread] # action == 0 states = [trend + spread], action == 1 states = [trend - spread]
+    position = action
+1番目のコードの２
+elif position == 1: # action == 0
+    sub = 0
+    p = [(trend - s) * pip_cost for s in states]
+    for b in range(0,len(p)):
+    r = True if p[b] <= -40 else False
+    if r:
+        pip.append(-40)
+        states.pop(b-sub)
+        sub += 1
+    states.append(trend + spread)
+    position = 1
+elif position == 2: # action == 1
+  sub = 0
+  p = [(s - trend) * pip_cost for s in states]
+  for b in range(0,len(p)):
+    r = True if p[b] <= -40 else False
+    if r:
+      pip.append(-40)
+      states.pop(b-sub)
+      sub += 1
+  states.append(trend - spread)
+  position = 2
+2番目にコードの２と３
+if position != 3:
+    if action == 0:
+        p = [(trend - s) * pip_cost for s in states]
+    else:
+        p = [(s - trend) * pip_cost for s in states]
+        spread *= -1
+2番目のコードの2
+if action == position:
+    sub = 0
+    for b in range(0,len(p)):
+        r = True if p[b] <= -40 else False
+        if r:
+            pip.append(-40)
+            states.pop(b - sub)
+            sub += 1
+１番目のコードの３
+elif position == 2: # action == 0
+  p = [(s - trend) * pip_cost for s in states]
+  for b in p:
+    b = -40.0 if b <= -40 else b
+    pip.append(b)
+  states = [trend + spread]
+  states = [trend + spread]
+  position = 1
+elif position == 1: action == 1
+    p = [(trend - s) * pip_cost for s in states]
+    for b in p:
+        b = -40.0 if b <= -40 else b
+        pip.append(b)
+    states = [trend + spread]
+    position = 2
+２番目の３
+else:
+    for b in p:
+        b = -40.0 if b <= -40 else b
+        pip.append(b)
+    states = [trend + spread]
+    position = action
+'''
+```
+##画像
+1番目のコードの結果（何回繰り返しても最初は必ずマイナスになる。）
+![1番目のコード](4710e294bf8399948889242f6d5dc4cf.jpeg)
+2番目のコードの結果（何回繰り返しても最初は必ずプラスになる。）
+![イメージ説明](0a367287e4d7542fc19162515da2e244.jpeg)

画像追加しました

2019/08/17 21:08

投稿

退会済みユーザー

スコア0

test CHANGED Viewed

File without changes

test CHANGED Viewed

@@ -185,3 +185,15 @@
 一番目のコードは無駄に長く、省けると思ったので短くしてみたのですが報酬の結果に違いが大幅に出てきてしまいました。
+画像を追加します。
+一番目のコードが以下の画像と似通った値になります。（最初は必ずマイナスその後は大体がプラス）
+![イメージ説明](7ff6b8892e05ebc54f704b6ace6e2669.jpeg)
+2番目のコードは最初は必ずプラスでその後がほぼ10epock程度しか確認していませんが、全てマイナスの結果になります。
+![イメージ説明](37e92dbb1b0ea52815ac88685dd2f018.jpeg)