python2.7でsklearnのプログラムを動かしますと、エラーが発生して、苦慮しております!
http://kamonohashiperry.com/archives/469 こちらの かものはし様のサイトでsklearnの勉強をさせて頂いていたところ、
ソースコードは、うまく動きますが、エラーが出てまいります。
# -*- coding: utf-8 -*- import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import LeaveOneOut from sklearn.ensemble import RandomForestClassifier import scipy as sp import sklearn import seaborn as sns from matplotlib import pyplot as plt import sklearn.cross_validation wine_df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=";") wine_df.head() #ワインの質に関する数値 Y = wine_df.quality.values #質に関するデータを落としている。 wine_df = wine_df.drop('quality', axis =1) #7よりも小さかったら0、それ以外は1とする。 Y = np.asarray([1 if i>=7 else 0 for i in Y]) wine_df.head() X = wine_df.as_matrix() from sklearn.ensemble import RandomForestClassifier from sklearn.cross_validation import cross_val_score scores =[] #1~41までの木の数のランダムフォレストを実行する。 for val in range(1,41): clf = RandomForestClassifier(n_estimators =val) validated = cross_val_score(clf, X, Y, cv =10) scores.append(validated) #木の数が2のランダムフォレストの結果を返す clf1 = RandomForestClassifier(n_estimators = 2) validated = cross_val_score(clf1, X, Y, cv=10) validated sns.boxplot(data=scores) plt.xlabel('number of trees') plt.ylabel('Classification scores') plt.title('Classification score for number of trees') plt.show() len_y = len(Y) temp = [i for i in Y if i ==0] temp_1 = temp.count(0) percentage = float(temp_1)/float(len_y) print(float(temp_1)/float(len_y)*100) sns.boxplot(data=scores) plt.axhline(y = percentage, ls = '--') plt.xlabel('number of trees') plt.ylabel('Classification Scores') plt.title('Classification scores of for trees') plt.show() scores = [] for val in range(1, 41): cfl = RandomForestClassifier(n_estimators = val) validated = cross_val_score(clf, X, Y, cv=10, scoring = 'f1') scores.append(validated) sns.boxplot( data=scores) plt.xlabel('number of trees') plt.ylabel('F1 Scores') plt.title('F1 scores as a function of the number of trees') plt.show() clf = RandomForestClassifier(n_estimators= 15) clf.fit(X, Y) (clf.predict_proba(X)[:,1] > 0.5).astype(int) def cutoff_predict(clf, X, cutoff): return (clf.predict_proba(X)[:,1] > cutoff).astype(int) scores = [] def custom_f1(cutoff): def f1_cutoff(clf, X, Y): ypred = cutoff_predict(clf, X, cutoff) return sklearn.metrics.f1_score(Y, ypred) return f1_cutoff for cutoff in np.arange(0.1, 0.9, 0.1): clf = RandomForestClassifier(n_estimators=15) validated = cross_val_score(clf, X, Y, cv=10, scoring=custom_f1(cutoff)) scores.append(validated) sns.boxplot(data=scores, names= np.arange(0.1, 0.9, 0.1)) plt.xlabel('each cut off value') plt.ylabel('F1 Scores') plt.title('custom F scores') plt.show() clf = RandomForestClassifier(n_estimators=15) clf.fit(X, Y) imp = clf.feature_importances_ names = wine_df.columns imp, names = zip(*sorted(zip(imp, names))) plt.barh(range(len(names)), imp, align='center') plt.yticks(range(len(names)), names) plt.xlabel('Importance of features') plt.ylabel('Features') plt.title('Importance of each feature') plt.show() from sklearn.tree import DecisionTreeClassifier import sklearn.linear_model import sklearn.svm def plot_decision_surface(clf, X_train, Y_train): plot_step=0.1 if X_train.shape[1] != 2: raise ValueError("X_train should have exactly 2 columns!") x_min, x_max = X_train[:, 0].min() - plot_step, X_train[:, 0].max() + plot_step y_min, y_max = X_train[:, 1].min() - plot_step, X_train[:, 1].max() + plot_step xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step), np.arange(y_min, y_max, plot_step)) clf.fit(X_train, Y_train) if hasattr(clf, 'predict_proba'): Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:,1] else: Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) cs = plt.contourf(xx, yy, Z, cmap = plt.cm.Reds) plt.scatter(X_train[:,0], X_train[:,1], c=Y_train, cmap=plt.cm.Paired) plt.show() imp_fe = np.argsort(imp)[::-1][0:2] X_imp = X[:, imp_fe] algorithms = [DecisionTreeClassifier(), RandomForestClassifier(), sklearn.svm.SVC(C = 100.0, gamma = 1)] title = ['Decision Tree Classifier', 'Random Forest Classifier', 'Support Vector Maachine'] for i in xrange(3): plt.title(title[i]) plt.xlabel('Feature1') plt.ylabel('Feature2') plot_decision_surface(algorithms[i], X_imp, Y) svm = [sklearn.svm.SVC(C = 1.0, gamma = 1.0, class_weight=None), sklearn.svm.SVC(C = 1.0, gamma = 1.0, class_weight='auto')] title = ['Svm without class weight', 'Svm with class weight'] for i in xrange(2): plt.title(title[i]) plt.xlabel('Feature1') plt.ylabel('Feature2') plot_decision_surface(svm[i], X_imp, Y)
上記のソースコードを動かしますと、以下のようなエラーが出てまいります。
C:\Python27\lib\site-packages\sklearn\cross_validation.py:44: DeprecationWarning: This module was de precated in version 0.18 in favor of the model_selection module into which all the refactored classe s and functions are moved. Also note that the interface of the new CV iterators are different from t hat of this module. This module will be removed in 0.20. "This module will be removed in 0.20.", DeprecationWarning) 86.4290181363 C:\Python27\lib\site-packages\sklearn\metrics\classification.py:1113: UndefinedMetricWarning: F-scor e is ill-defined and being set to 0.0 due to no predicted samples. 'precision', 'predicted', average, warn_for) C:\Python27\lib\site-packages\seaborn\categorical.py:2171: UserWarning: The boxplot API has been cha nged. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info. warnings.warn(msg, UserWarning) C:\Python27\lib\site-packages\sklearn\utils\class_weight.py:65: DeprecationWarning: The class_weight ='auto' heuristic is deprecated in 0.17 in favor of a new heuristic class_weight='balanced'. 'auto' will be removed in 0.19 " 0.19", DeprecationWarning)
エラーコードを調べましたところ、http://qiita.com/kasajei/items/e23929627d51aa1b09fe
kasajei様のサイトにたどりつき、いろいろ試しましたが、うまく出来ませんでした。
バージョンの問題だということはわかるのですが、動かすたびにエラーが出てきて苦慮しております。
先輩方の御教示、よろしくお願いいたします。
回答1件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
2017/07/03 10:28
2017/07/03 10:35