質問編集履歴
1
コード全体の記述を行いました。
test
CHANGED
File without changes
|
test
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Kaggleコンペ 住宅価格予想について、エラーの原因が特定できません。
|
1
|
+
```Kaggleコンペ 住宅価格予想について、エラーの原因が特定できません。
|
2
2
|
|
3
3
|
AIacademyと言うサイトのコード、カリキュラムを参考にしてます。
|
4
4
|
|
@@ -8,7 +8,215 @@
|
|
8
8
|
|
9
9
|
|
10
10
|
|
11
|
-
|
11
|
+
```ここに言語を入力 Python
|
12
|
+
|
13
|
+
コード
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
#データの読み込み
|
18
|
+
|
19
|
+
import numpy as np
|
20
|
+
|
21
|
+
import pandas as pd
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
train= pd.read_csv("/kaggle/input/house-prices-advanced-regression-techniques/train.csv")
|
26
|
+
|
27
|
+
test = pd.read_csv("/kaggle/input/house-prices-advanced-regression-techniques/test.csv")
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
train.head()
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
train.info()
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
print(train.shape,test.shape)
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
#欠損地の多いデータを消す
|
46
|
+
|
47
|
+
train = train.drop('Alley',axis=1).drop('FireplaceQu',axis=1).drop('PoolQC',axis=1).drop('Fence',axis=1).drop('MiscFeature',axis=1)
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
test =test.drop('Alley',axis=1).drop('FireplaceQu',axis=1).drop('PoolQC',axis=1).drop('Fence',axis=1).drop('MiscFeature',axis=1)
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
train_id = train['Id']
|
58
|
+
|
59
|
+
test_id = test['Id']
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
y_train = train['SalePrice']
|
64
|
+
|
65
|
+
x_train = train.drop(['Id','SalePrice'],axis=1)
|
66
|
+
|
67
|
+
x_test = test.drop('Id',axis=1)
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
#欠損値を中央値で補完
|
74
|
+
|
75
|
+
x_train = x_train.fillna(x_train.median())
|
76
|
+
|
77
|
+
x_test = x_test.fillna(x_test.median())
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
x_train.info()
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
#object型の欠損値をmodeで埋める
|
86
|
+
|
87
|
+
for i in range(x_train.shape[1]):
|
88
|
+
|
89
|
+
if x_train.iloc[:,i].dtype == object:
|
90
|
+
|
91
|
+
mode = x_train.mode()[x_train.columns.values[i]].values
|
92
|
+
|
93
|
+
for j in range(x_train.shape[0]):
|
94
|
+
|
95
|
+
if x_train.isnull().iloc[j,i]==True:
|
96
|
+
|
97
|
+
x_train.iloc[j,i] =mode
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
for i in range(x_test.shape[1]):
|
104
|
+
|
105
|
+
if x_test.iloc[:,i].dtype == object:
|
106
|
+
|
107
|
+
mode = x_test.mode()[x_test.columns.values[i]].values
|
108
|
+
|
109
|
+
for j in range(x_test.shape[0]):
|
110
|
+
|
111
|
+
if x_test.isnull().iloc[j,i]==True:
|
112
|
+
|
113
|
+
x_test.iloc[j,i] = mode
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
x_train.isnull().sum().sum()
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
#ラベルエンコーディング
|
124
|
+
|
125
|
+
from sklearn.preprocessing import LabelEncoder
|
126
|
+
|
127
|
+
le = LabelEncoder()
|
128
|
+
|
129
|
+
|
130
|
+
|
131
|
+
#ラベルエンコーダー(訓練セット)
|
132
|
+
|
133
|
+
for i in range(x_train.shape[1]):
|
134
|
+
|
135
|
+
if x_train.iloc[:,i].dtypes == object:
|
136
|
+
|
137
|
+
le.fit(list(x_train[x_train.columns.values[i]].values))
|
138
|
+
|
139
|
+
x_train[x_train.columns.values[i]] = le.transform(list(x_train[x_train.columns.values[i]].values))
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
#ラベルエンコーダー(テストセット)
|
144
|
+
|
145
|
+
for i in range(x_test.shape[1]):
|
146
|
+
|
147
|
+
if x_test.iloc[:,i].dtypes == object:
|
148
|
+
|
149
|
+
le.fit(list(x_test[x_test.columns.values[i]].values))
|
150
|
+
|
151
|
+
x_test[x_test.columns.values[i]] = le.transform(list(x_test[x_test.columns.values[i]].values))
|
152
|
+
|
153
|
+
|
154
|
+
|
155
|
+
x_train.info()
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
#特徴量の削減
|
162
|
+
|
163
|
+
from sklearn.feature_selection import SelectKBest,f_regression
|
164
|
+
|
165
|
+
|
166
|
+
|
167
|
+
selector = SelectKBest(score_func=f_regression,k=5)
|
168
|
+
|
169
|
+
selector.fit(x_train,y_train)
|
170
|
+
|
171
|
+
print(selector.get_support())
|
172
|
+
|
173
|
+
|
174
|
+
|
175
|
+
|
176
|
+
|
177
|
+
x_train_selected =pd.DataFrame({'OverallQual':x_train['OverallQual'],'ExterQual':x_train['ExterQual'],'GrLivArea':x_train['GrLivArea'],'GarageCars':x_train['GarageCars'],'GarageArea':x_train['GarageArea']})
|
178
|
+
|
179
|
+
|
180
|
+
|
181
|
+
x_test_selected = pd.DataFrame({'OverallQual':x_test['OverallQual'],'ExterQual':x_test['ExterQual'],'GrLivArea':x_test['GrLivArea'],'GarageCars':x_test['GarageCars'],'GarageArea':x_test['GarageArea']})
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
x_train_selected.head()
|
186
|
+
|
187
|
+
|
188
|
+
|
189
|
+
#訓練セットを分ける
|
190
|
+
|
191
|
+
from sklearn.model_selection import train_test_split
|
192
|
+
|
193
|
+
xp_train,xp_test,yp_train,yp_test = train_test_split(x_train_selected,y_train,test_size=0.3,random_state=1)
|
194
|
+
|
195
|
+
|
196
|
+
|
197
|
+
#ランダムフォレストとサポートベクター回帰(SVR)を読み込み、グリッドサーチを行う
|
198
|
+
|
199
|
+
from sklearn.svm import SVR
|
200
|
+
|
201
|
+
from sklearn.ensemble import RandomForestRegressor
|
202
|
+
|
203
|
+
|
204
|
+
|
205
|
+
forest = RandomForestRegressor
|
206
|
+
|
207
|
+
svr =SVR()
|
208
|
+
|
209
|
+
|
210
|
+
|
211
|
+
parameters_forest = {'n_estimators':[100,500,1000,3000],'max-depth':[3,6,12]}
|
212
|
+
|
213
|
+
parameters_svr = {'C':[0.1,10,1000],'epsilzon':[0.01,0.1,0.5]}
|
214
|
+
|
215
|
+
|
216
|
+
|
217
|
+
|
218
|
+
|
219
|
+
#グリッドサーチを行う(エラー発生)
|
12
220
|
|
13
221
|
from sklearn.model_selection import GridSearchCV
|
14
222
|
|
@@ -84,6 +292,8 @@
|
|
84
292
|
|
85
293
|
|
86
294
|
|
87
|
-
初心者で対処の仕方など、よくわかってないので試したことは特にありません。
|
295
|
+
初心者で対処の仕方など、よくわかってないので試したことは特にありません。
|
296
|
+
|
297
|
+
コードのスペルミスはないか2度コード全体のスペルチェックはしてます。
|
88
298
|
|
89
299
|
このエラーはどんな意味で、対応法をを知っている方いましたら、解答よろしくお願いします。
|