Python
1import pandas as pd 2from sklearn.model_selection import train_test_split 3from sklearn.tree import DecisionTreeClassifier 4#csvファイルの読み込み 5df = pd.read_csv('sasa_2019.csv', 6 encoding = "shift-jis", 7 skiprows = 1, 8 names = ['ID','menseki','totiriyou','0722RADER','0604RADER','0611RADER', 9 '0525A','0606A','0513A','R_avg','A_avg','rorn', 10 '0404N','0504N','0511N','0524N','1001N','1028N','1117N','0404R','0504R','0511R','0524R','1001R','1028R','1117R','0404G','0504G','0511G','0524G','1001G','1028G','1117G','0404B','0504B','0511B','0524B','1001B','1028B','1117B', 11 '0409N','0416N','0516N','0625N','1102N','1115N','0409R','0416R','0516R','0625R','1102R','1115R','0409G','0416G','0516G','0625G','1102G','1115G','0409B','0416B','0516B','0625B','1102B','1115B', 12 '0404Y','0504Y','0511Y','0524Y','1001Y','1028Y','1117Y','0409Y','0416Y','0516Y','0625Y','1102Y','1115Y','0712A','0724A','0805A','A7_avg','A7-A_avg', 13 '170623N','170720N','170809N','170829N','170926N','180715N','180928N','170623R','170720R','170809R','170829R','170926R','180715R','180928R','170623G','170720G','170809G','170829G','170926G','180715G','180928G','170623B','170720B','170809B','170829B','170926B','180715B','180928B', 14 "A8_avg","180804R","180804G","180804B","180804N", 15 '0404I1','0409I1','0416I1','0504I1','0511I1','0516I1','0524I1','170623I1','0625I1','180715I1','170720I1','170809I1','170829I1','170926I1','180928I1','1001I1','1028I1','1102I1','1115I1','1117I1','180804I1', 16 '170623Y','170720Y','170809Y','170829Y','170926Y','180715Y','180928Y','180804Y' 17 ] 18 ) 19 20 21#使わない列の削除 22#sasa=sasa.drop(['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50','51','52','53','54','55'],axis = 1) 23sasa = df.dropna(how='all') 24 25X=sasa.loc[:,["A_avg","A8_avg"]] 26y=sasa['rorn' 27X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=1) 28 29clf = DecisionTreeClassifier(max_depth = 1) 30clf = clf.fit(X_train,y_train) 31predicted_y = clf.predict(X_test) 32score = accuracy_score(y_test, predicted_y) 33print("score =",score) 34
以上のコードを回すと、ValueErrorが発生してしまいました。
Python
1Traceback (most recent call last): 2 3 File "<ipython-input-3-0dbe063d4d7d>", line 1, in <module> 4 runfile('C:/Users/keikaku/Lesson3/sasa_dt.py', wdir='C:/Users/keikaku/Lesson3') 5 6 File "C:\Anaconda\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile 7 execfile(filename, namespace) 8 9 File "C:\Anaconda\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile 10 exec(compile(f.read(), filename, 'exec'), namespace) 11 12 File "C:/Users/keikaku/Lesson3/sasa_dt.py", line 72, in <module> 13 clf = clf.fit(X_train,y_train) 14 15 File "C:\Anaconda\lib\site-packages\sklearn\tree\tree.py", line 816, in fit 16 X_idx_sorted=X_idx_sorted) 17 18 File "C:\Anaconda\lib\site-packages\sklearn\tree\tree.py", line 131, in fit 19 y = check_array(y, ensure_2d=False, dtype=None) 20 21 File "C:\Anaconda\lib\site-packages\sklearn\utils\validation.py", line 542, in check_array 22 allow_nan=force_all_finite == 'allow-nan') 23 24 File "C:\Anaconda\lib\site-packages\sklearn\utils\validation.py", line 56, in _assert_all_finite 25 raise ValueError(msg_err.format(type_err, X.dtype)) 26 27ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
データを見てみると欠測値はないと思われるので、データが大きいというエラーでしょうか?
またこのエラーの解決方法が分かる方がおられましたら、回答よろしくお願いします。
回答1件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
2020/02/08 03:15