質問編集履歴
1
情報の追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -219,3 +219,135 @@
|
|
219
219
|
なぜ最初のコードではstringがfloatに変換できないと言われ、
|
220
220
|
|
221
221
|
次のコードではndarray(dtype=int64)がstrに変換できないと言われるのでしょうか?どう直せば良いのでしょうか?
|
222
|
+
|
223
|
+
ちなみにRandomForestClassifier以前のコードは以下のようになっています。
|
224
|
+
|
225
|
+
```ここに言語を入力
|
226
|
+
|
227
|
+
|
228
|
+
|
229
|
+
# coding: utf-8
|
230
|
+
|
231
|
+
|
232
|
+
|
233
|
+
# In[1]:
|
234
|
+
|
235
|
+
|
236
|
+
|
237
|
+
import pandas as pd
|
238
|
+
|
239
|
+
import matplotlib.pyplot as plt
|
240
|
+
|
241
|
+
from sklearn.ensemble import RandomForestClassifier
|
242
|
+
|
243
|
+
get_ipython().magic('matplotlib inline')
|
244
|
+
|
245
|
+
|
246
|
+
|
247
|
+
|
248
|
+
|
249
|
+
# In[2]:
|
250
|
+
|
251
|
+
|
252
|
+
|
253
|
+
df = pd.read_csv("Desktop/data/train.csv", delimiter=',')
|
254
|
+
|
255
|
+
print(df.head())
|
256
|
+
|
257
|
+
print(df.columns)
|
258
|
+
|
259
|
+
|
260
|
+
|
261
|
+
|
262
|
+
|
263
|
+
# In[3]:
|
264
|
+
|
265
|
+
|
266
|
+
|
267
|
+
mapping = {'male' : 0, 'female' : 1}
|
268
|
+
|
269
|
+
df.Sex = df.Sex.replace(mapping)
|
270
|
+
|
271
|
+
print(df.Sex)
|
272
|
+
|
273
|
+
df.replace("male",0).replace("female",1)
|
274
|
+
|
275
|
+
|
276
|
+
|
277
|
+
|
278
|
+
|
279
|
+
# In[4]:
|
280
|
+
|
281
|
+
|
282
|
+
|
283
|
+
df["Age"].fillna(df.Age.median(),inplace=True)
|
284
|
+
|
285
|
+
|
286
|
+
|
287
|
+
|
288
|
+
|
289
|
+
# In[5]:
|
290
|
+
|
291
|
+
|
292
|
+
|
293
|
+
split_data = []
|
294
|
+
|
295
|
+
for survived in [0,1]:
|
296
|
+
|
297
|
+
split_data.append(df[df.Survived==survived])
|
298
|
+
|
299
|
+
temp = [i["Pclass"].dropna() for i in split_data]
|
300
|
+
|
301
|
+
plt.hist(temp,histtype="barstacked",bins=3)
|
302
|
+
|
303
|
+
|
304
|
+
|
305
|
+
|
306
|
+
|
307
|
+
# In[6]:
|
308
|
+
|
309
|
+
|
310
|
+
|
311
|
+
temp = [i["Age"].dropna() for i in split_data]
|
312
|
+
|
313
|
+
plt.hist(temp, histtype="barstacked", bins=16)
|
314
|
+
|
315
|
+
|
316
|
+
|
317
|
+
|
318
|
+
|
319
|
+
# In[7]:
|
320
|
+
|
321
|
+
|
322
|
+
|
323
|
+
df["FamilySize"] = df["SibSp"] + df["Parch"] + 1
|
324
|
+
|
325
|
+
df2 = df.drop(["Name", "SibSp", "Parch", "Ticket", "Fare", "Cabin", "Embarked"], axis=1)
|
326
|
+
|
327
|
+
|
328
|
+
|
329
|
+
|
330
|
+
|
331
|
+
# In[8]:
|
332
|
+
|
333
|
+
|
334
|
+
|
335
|
+
df2.head(10)
|
336
|
+
|
337
|
+
|
338
|
+
|
339
|
+
|
340
|
+
|
341
|
+
# In[9]:
|
342
|
+
|
343
|
+
|
344
|
+
|
345
|
+
train_data = df2.values
|
346
|
+
|
347
|
+
xs = train_data[:, 2:] # Pclass以降の変数
|
348
|
+
|
349
|
+
y = train_data[:, 1] # 正解データ
|
350
|
+
|
351
|
+
|
352
|
+
|
353
|
+
```
|