Pythonで機械学習を行っているのですが、エラーがでてしまい困っています

前提・実現したいこと

機械学習で為替の予測をしているのですが、特徴量であるADXRを用いて学習をさせようとしたら、エラーメッセージがでてしまい先に進めることができません。

発生している問題・エラーメッセージ

ValueError                                Traceback (most recent call last)
<ipython-input-19-82ba81b7b0d8> in <module>
    135 #訓練用の説明変数と正解クラス、評価用の説明変数と正解クラスに分割
    136 for i in range(10):
--> 137     X_train,X_test,Y_train,Y_test = train_test_split(df_xy[["ADXR"]],df_xy["前日比_classified"],train_size=0.8)
    138 
    139     clf = RandomForestClassifier(max_depth=30,n_estimators=50)

~\miniconda3\lib\site-packages\sklearn\model_selection\_split.py in train_test_split(*arrays, **options)
   2129     n_samples = _num_samples(arrays[0])
   2130     n_train, n_test = _validate_shuffle_split(n_samples, test_size, train_size,
-> 2131                                               default_test_size=0.25)
   2132 
   2133     if shuffle is False:

~\miniconda3\lib\site-packages\sklearn\model_selection\_split.py in _validate_shuffle_split(n_samples, test_size, train_size, default_test_size)
   1812             'resulting train set will be empty. Adjust any of the '
   1813             'aforementioned parameters.'.format(n_samples, test_size,
-> 1814                                                 train_size)
   1815         )
   1816 

ValueError: With n_samples=0, test_size=None and train_size=0.8, the resulting train set will be empty. Adjust any of the aforementioned parameters.

該当のソースコード

Python3
1
2import numpy as np 
3import pandas as pd
4import talib as ta
5import matplotlib.pyplot as plt
6
7df = pd.read_csv('USD_JPY 10-14.csv')
8
9
10open = np.array(df["始値"])
11close = np.array(df["終値"])
12high = np.array(df["高値"])
13low = np.array(df["安値"])
14
15#特徴量を格納するデータフレーム
16df_feature = pd.DataFrame(index=range(len(df)),columns=["ADXR","APO","AROON","AROONOSC","BOP","CCI","CMO","DX","MACDEXT","MACDFIX","MFI","MINUS_DI","MINUS_DM","MOM","PPO"])
17
18
19#MACD
20df_feature["MACD"],_,_ = ta.MACD(close,fastperiod = 12,slowperiod = 26,signalperiod = 9)
21
22#ボリンジャーバンド
23upper,middle,lower = ta.BBANDS(close,timeperiod = 5,nbdevup = 2,nbdevdn = 2,matype = 0)
24df_feature["BBANDS+2σ"] = upper/close
25df_feature["BBANDS-2σ"] = lower/close
26
27#RSI
28df_feature["RSI"] = ta.RSI(close, timeperiod=14)
29
30#SMA5/current
31df_feature["SMA5/current"]= ta.SMA(close, timeperiod=5) / close
32df_feature["SMA20/current"]= ta.SMA(close, timeperiod=20) / close
33
34#TEMA
35df_feature["TEMA"]= ta.TEMA(close, timeperiod=30)
36
37#WMA
38df_feature["WMA"]= ta.WMA(close, timeperiod=30)
39
40#MIDPOINT
41df_feature["MIDPOINT"]= ta.MIDPOINT(close, timeperiod=14)
42
43#ADX
44df_feature["ADX"]= ta.ADX(high,low,close, timeperiod=14)
45
46#DEMA
47df_feature["DEMA"]= ta.DEMA(close, timeperiod=30)
48
49#EMA
50df_feature["EMA"]= ta.EMA(close, timeperiod=30)
51
52#HT_TRENDLINE
53df_feature["HT_TRENDLINE"]= ta.HT_TRENDLINE(close)
54
55#KAMA
56df_feature["KAMA"]= ta.KAMA(close, timeperiod=30)
57
58#MA
59df_feature["MA"]= ta.MA(close, timeperiod=30, matype=0)
60
61#MIDPRICE
62df_feature["MIDPRICE"]= ta.MIDPRICE(high, low, timeperiod=14)
63
64#SAR
65df_feature["SAR"]= ta.SAR(high, low, acceleration=0, maximum=0)
66
67#ADXR
68df_feature["ADXR"]= ta.ADXR(high, low, close, timeperiod=14)
69
70#APO
71df_feature["APO"]= ta.APO(close, fastperiod=12, slowperiod=26, matype=0)
72
73#AROON
74#df_feature["AROON"]= ta.AROON(high, low, timeperiod=14)
75
76#AROONOSC
77df_feature["AROONOSC"]= ta.AROONOSC(high, low, timeperiod=14)
78
79#BOP
80df_feature["BOP"]= ta.BOP(open, high, low, close)
81
82#CCI
83df_feature["CCI"]= ta.CCI(high, low, close, timeperiod=14)
84
85#CMO
86df_feature["CMO"]= ta.CMO(close, timeperiod=14)
87
88#DX
89df_feature["DX"]= ta.DX(high, low, close, timeperiod=14)
90
91#MACDEXT
92#df_feature["MACDEXT"]= ta.MACDEXT(close, fastperiod=12, fastmatype=0, slowperiod=26, slowmatype=0, signalperiod=9, signalmatype=0)
93
94#MACDFIX
95#df_feature["MACDFIX"]= ta.MACDFIX(close, signalperiod=9)
96
97#MFI
98#df_feature["MFI"]= ta.MFI(high, low, close, volume, timeperiod=14)
99
100
101#教師データ
102df["前日比_float"] = df["前日比%"].apply(lambda x: float(x.replace("%","")))
103
104def classify(x):
105    if x <= -0.2:
106        return 0
107    
108    elif -0.2 < x < 0.2:
109        return 1
110    
111    elif 0.2 <= x:
112        return 2
113    
114df["前日比_classified"] = df["前日比_float"].apply(lambda x: classify(x))
115
116df_y = df["前日比_classified"].shift()
117
118df_xy = pd.concat([df_feature, df_y],axis = 1)
119
120df_xy = df_xy.dropna(how = "any")
121
122
123from sklearn.model_selection import train_test_split
124from sklearn.ensemble import RandomForestClassifier
125from sklearn.metrics import accuracy_score
126import optuna
127
128list = []
129list2 = []
130list3 = []
131sum = 0
132sum2 = 0
133sum3 = 0
134
135#訓練用の説明変数と正解クラス、評価用の説明変数と正解クラスに分割 
136for i in range(10):
137    X_train,X_test,Y_train,Y_test = train_test_split(df_xy[["ADXR"]],df_xy["前日比_classified"],train_size=0.8)
138
139    clf = RandomForestClassifier(max_depth=30,n_estimators=50)
140    clf.fit(X_train, Y_train)
141
142    print("訓練セットの精度"+str(i+1)+"回目: {:.3f}".format(clf.score(X_train, Y_train)))
143    print("テストセットの精度"+str(i+1)+"回目: {:.3f}".format(clf.score(X_test, Y_test)))
144    sum += round(clf.score(X_test, Y_test),3)
145    list.insert(i,round(clf.score(X_test, Y_test),3)*100)
146    
147print(list)
148print("合計：",sum)
149ave = round(sum/10,3)
150print("平均：",ave)
151
152for j in range(10):
153    X_train,X_test,Y_train,Y_test = train_test_split(df_xy[["ADXR"]],df_xy["前日比_classified"],train_size=0.8)
154
155    clf = RandomForestClassifier(max_depth=30,n_estimators=50)
156    clf.fit(X_train, Y_train)
157
158    print("訓練セットの精度"+str(j+1)+"回目: {:.3f}".format(clf.score(X_train, Y_train)))
159    print("テストセットの精度"+str(j+1)+"回目: {:.3f}".format(clf.score(X_test, Y_test)))
160    sum2 += round(clf.score(X_test, Y_test),3)
161    list2.insert(j,round(clf.score(X_test, Y_test),3)*100)
162
163print(list2)
164print("合計：",sum2)
165ave2 = round(sum2/10,3)
166print("平均：",ave2)
167
168for k in range(10):
169    X_train,X_test,Y_train,Y_test = train_test_split(df_xy[["ADXR"]],df_xy["前日比_classified"],train_size=0.8)
170
171    clf = RandomForestClassifier(max_depth=30,n_estimators=50)
172    clf.fit(X_train, Y_train)
173
174    print("訓練セットの精度"+str(k+1)+"回目: {:.3f}".format(clf.score(X_train, Y_train)))
175    print("テストセットの精度"+str(k+1)+"回目: {:.3f}".format(clf.score(X_test, Y_test)))
176    sum3 += round(clf.score(X_test, Y_test),3)
177    list3.insert(k,round(clf.score(X_test, Y_test),3)*100)
178
179print(list3)
180print("合計：",sum3)
181ave3 = round(sum3/10,3)
182print("平均：",ave3)
183
184
185alist = (list,list2,list3)
186fig, ax = plt.subplots()
187bp = ax.boxplot(alist)
188ax.set_xticklabels(['30:50','30:100','30:150'])
189
190plt.title('2010-2014')
191plt.xlabel('parameter(max_depth:estimators)')
192plt.ylabel('accuracy')
193
194plt.ylim([20,50])
195plt.grid()
196