前提・実現したいこと
機械学習で為替の予測をしているのですが、特徴量であるADXRを用いて学習をさせようとしたら、エラーメッセージがでてしまい先に進めることができません。
発生している問題・エラーメッセージ
ValueError Traceback (most recent call last) <ipython-input-19-82ba81b7b0d8> in <module> 135 #訓練用の説明変数と正解クラス、評価用の説明変数と正解クラスに分割 136 for i in range(10): --> 137 X_train,X_test,Y_train,Y_test = train_test_split(df_xy[["ADXR"]],df_xy["前日比_classified"],train_size=0.8) 138 139 clf = RandomForestClassifier(max_depth=30,n_estimators=50) ~\miniconda3\lib\site-packages\sklearn\model_selection\_split.py in train_test_split(*arrays, **options) 2129 n_samples = _num_samples(arrays[0]) 2130 n_train, n_test = _validate_shuffle_split(n_samples, test_size, train_size, -> 2131 default_test_size=0.25) 2132 2133 if shuffle is False: ~\miniconda3\lib\site-packages\sklearn\model_selection\_split.py in _validate_shuffle_split(n_samples, test_size, train_size, default_test_size) 1812 'resulting train set will be empty. Adjust any of the ' 1813 'aforementioned parameters.'.format(n_samples, test_size, -> 1814 train_size) 1815 ) 1816 ValueError: With n_samples=0, test_size=None and train_size=0.8, the resulting train set will be empty. Adjust any of the aforementioned parameters.
該当のソースコード
Python3
1 2import numpy as np 3import pandas as pd 4import talib as ta 5import matplotlib.pyplot as plt 6 7df = pd.read_csv('USD_JPY 10-14.csv') 8 9 10open = np.array(df["始値"]) 11close = np.array(df["終値"]) 12high = np.array(df["高値"]) 13low = np.array(df["安値"]) 14 15#特徴量を格納するデータフレーム 16df_feature = pd.DataFrame(index=range(len(df)),columns=["ADXR","APO","AROON","AROONOSC","BOP","CCI","CMO","DX","MACDEXT","MACDFIX","MFI","MINUS_DI","MINUS_DM","MOM","PPO"]) 17 18 19#MACD 20df_feature["MACD"],_,_ = ta.MACD(close,fastperiod = 12,slowperiod = 26,signalperiod = 9) 21 22#ボリンジャーバンド 23upper,middle,lower = ta.BBANDS(close,timeperiod = 5,nbdevup = 2,nbdevdn = 2,matype = 0) 24df_feature["BBANDS+2σ"] = upper/close 25df_feature["BBANDS-2σ"] = lower/close 26 27#RSI 28df_feature["RSI"] = ta.RSI(close, timeperiod=14) 29 30#SMA5/current 31df_feature["SMA5/current"]= ta.SMA(close, timeperiod=5) / close 32df_feature["SMA20/current"]= ta.SMA(close, timeperiod=20) / close 33 34#TEMA 35df_feature["TEMA"]= ta.TEMA(close, timeperiod=30) 36 37#WMA 38df_feature["WMA"]= ta.WMA(close, timeperiod=30) 39 40#MIDPOINT 41df_feature["MIDPOINT"]= ta.MIDPOINT(close, timeperiod=14) 42 43#ADX 44df_feature["ADX"]= ta.ADX(high,low,close, timeperiod=14) 45 46#DEMA 47df_feature["DEMA"]= ta.DEMA(close, timeperiod=30) 48 49#EMA 50df_feature["EMA"]= ta.EMA(close, timeperiod=30) 51 52#HT_TRENDLINE 53df_feature["HT_TRENDLINE"]= ta.HT_TRENDLINE(close) 54 55#KAMA 56df_feature["KAMA"]= ta.KAMA(close, timeperiod=30) 57 58#MA 59df_feature["MA"]= ta.MA(close, timeperiod=30, matype=0) 60 61#MIDPRICE 62df_feature["MIDPRICE"]= ta.MIDPRICE(high, low, timeperiod=14) 63 64#SAR 65df_feature["SAR"]= ta.SAR(high, low, acceleration=0, maximum=0) 66 67#ADXR 68df_feature["ADXR"]= ta.ADXR(high, low, close, timeperiod=14) 69 70#APO 71df_feature["APO"]= ta.APO(close, fastperiod=12, slowperiod=26, matype=0) 72 73#AROON 74#df_feature["AROON"]= ta.AROON(high, low, timeperiod=14) 75 76#AROONOSC 77df_feature["AROONOSC"]= ta.AROONOSC(high, low, timeperiod=14) 78 79#BOP 80df_feature["BOP"]= ta.BOP(open, high, low, close) 81 82#CCI 83df_feature["CCI"]= ta.CCI(high, low, close, timeperiod=14) 84 85#CMO 86df_feature["CMO"]= ta.CMO(close, timeperiod=14) 87 88#DX 89df_feature["DX"]= ta.DX(high, low, close, timeperiod=14) 90 91#MACDEXT 92#df_feature["MACDEXT"]= ta.MACDEXT(close, fastperiod=12, fastmatype=0, slowperiod=26, slowmatype=0, signalperiod=9, signalmatype=0) 93 94#MACDFIX 95#df_feature["MACDFIX"]= ta.MACDFIX(close, signalperiod=9) 96 97#MFI 98#df_feature["MFI"]= ta.MFI(high, low, close, volume, timeperiod=14) 99 100 101#教師データ 102df["前日比_float"] = df["前日比%"].apply(lambda x: float(x.replace("%",""))) 103 104def classify(x): 105 if x <= -0.2: 106 return 0 107 108 elif -0.2 < x < 0.2: 109 return 1 110 111 elif 0.2 <= x: 112 return 2 113 114df["前日比_classified"] = df["前日比_float"].apply(lambda x: classify(x)) 115 116df_y = df["前日比_classified"].shift() 117 118df_xy = pd.concat([df_feature, df_y],axis = 1) 119 120df_xy = df_xy.dropna(how = "any") 121 122 123from sklearn.model_selection import train_test_split 124from sklearn.ensemble import RandomForestClassifier 125from sklearn.metrics import accuracy_score 126import optuna 127 128list = [] 129list2 = [] 130list3 = [] 131sum = 0 132sum2 = 0 133sum3 = 0 134 135#訓練用の説明変数と正解クラス、評価用の説明変数と正解クラスに分割 136for i in range(10): 137 X_train,X_test,Y_train,Y_test = train_test_split(df_xy[["ADXR"]],df_xy["前日比_classified"],train_size=0.8) 138 139 clf = RandomForestClassifier(max_depth=30,n_estimators=50) 140 clf.fit(X_train, Y_train) 141 142 print("訓練セットの精度"+str(i+1)+"回目: {:.3f}".format(clf.score(X_train, Y_train))) 143 print("テストセットの精度"+str(i+1)+"回目: {:.3f}".format(clf.score(X_test, Y_test))) 144 sum += round(clf.score(X_test, Y_test),3) 145 list.insert(i,round(clf.score(X_test, Y_test),3)*100) 146 147print(list) 148print("合計:",sum) 149ave = round(sum/10,3) 150print("平均:",ave) 151 152for j in range(10): 153 X_train,X_test,Y_train,Y_test = train_test_split(df_xy[["ADXR"]],df_xy["前日比_classified"],train_size=0.8) 154 155 clf = RandomForestClassifier(max_depth=30,n_estimators=50) 156 clf.fit(X_train, Y_train) 157 158 print("訓練セットの精度"+str(j+1)+"回目: {:.3f}".format(clf.score(X_train, Y_train))) 159 print("テストセットの精度"+str(j+1)+"回目: {:.3f}".format(clf.score(X_test, Y_test))) 160 sum2 += round(clf.score(X_test, Y_test),3) 161 list2.insert(j,round(clf.score(X_test, Y_test),3)*100) 162 163print(list2) 164print("合計:",sum2) 165ave2 = round(sum2/10,3) 166print("平均:",ave2) 167 168for k in range(10): 169 X_train,X_test,Y_train,Y_test = train_test_split(df_xy[["ADXR"]],df_xy["前日比_classified"],train_size=0.8) 170 171 clf = RandomForestClassifier(max_depth=30,n_estimators=50) 172 clf.fit(X_train, Y_train) 173 174 print("訓練セットの精度"+str(k+1)+"回目: {:.3f}".format(clf.score(X_train, Y_train))) 175 print("テストセットの精度"+str(k+1)+"回目: {:.3f}".format(clf.score(X_test, Y_test))) 176 sum3 += round(clf.score(X_test, Y_test),3) 177 list3.insert(k,round(clf.score(X_test, Y_test),3)*100) 178 179print(list3) 180print("合計:",sum3) 181ave3 = round(sum3/10,3) 182print("平均:",ave3) 183 184 185alist = (list,list2,list3) 186fig, ax = plt.subplots() 187bp = ax.boxplot(alist) 188ax.set_xticklabels(['30:50','30:100','30:150']) 189 190plt.title('2010-2014') 191plt.xlabel('parameter(max_depth:estimators)') 192plt.ylabel('accuracy') 193 194plt.ylim([20,50]) 195plt.grid() 196
試したこと
ネットで調べたのですが詳しいことがわからず、困っています。
原因を教えてもらえないでしょうか?
よろしくお願いします。
補足情報(FW/ツールのバージョンなど)
ここにより詳細な情報を記載してください。
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
2021/07/26 16:51