teratail header banner
teratail header banner
質問するログイン新規登録

質問編集履歴

1

ソースコードとエラーの追加

2019/10/06 14:57

投稿

rikubon_
rikubon_

スコア39

title CHANGED
File without changes
body CHANGED
@@ -3,15 +3,87 @@
3
3
  ### 発生している問題・エラーメッセージ
4
4
 
5
5
  ```
6
+ ValueError Traceback (most recent call last)
7
+ <ipython-input-106-badfaf7f9db2> in <module>
8
+ 2 lr = LogisticRegression()
6
- X has 4 features per sample; expecting 5
9
+ 3 lr.fit(x_train, y_train)
10
+ ----> 4 lr.predict(x_test)
7
11
 
12
+ /opt/conda/lib/python3.6/site-packages/sklearn/linear_model/base.py in predict(self, X)
13
+ 287 Predicted class label per sample.
14
+ 288 """
15
+ --> 289 scores = self.decision_function(X)
16
+ 290 if len(scores.shape) == 1:
17
+ 291 indices = (scores > 0).astype(np.int)
8
18
 
19
+ /opt/conda/lib/python3.6/site-packages/sklearn/linear_model/base.py in decision_function(self, X)
20
+ 268 if X.shape[1] != n_features:
21
+ 269 raise ValueError("X has %d features per sample; expecting %d"
22
+ --> 270 % (X.shape[1], n_features))
23
+ 271
24
+ 272 scores = safe_sparse_dot(X, self.coef_.T,
25
+
26
+ ValueError: X has 4 features per sample; expecting 5
27
+
28
+
29
+
30
+
9
31
  ```
10
32
 
11
33
  ### 該当のソースコード
12
34
 
13
35
  ```ここに言語名を入力
36
+ # 欠損値の補完
37
+ train_age_mean = train['Age'].mean()
38
+ train.fillna(value={'Age':train_age_mean}, inplace=True)
39
+ train['Age'] = train['Age'].astype(int)
40
+ # 特徴量の削除
41
+ train.drop('PassengerId', axis=1, inplace=True)
42
+ train.drop('Name', axis=1, inplace=True)
43
+ train.drop('Ticket', axis=1, inplace=True)
44
+ train.drop('Cabin', axis=1, inplace=True)
45
+ train.drop('Embarked', axis=1, inplace=True)
46
+ # 特徴量の値の変化
47
+ train.replace({'male':0, 'female':0}, inplace=True)
48
+ # 特徴量エンジニアリング
49
+ train['familysize'] = train['SibSp'] + train['Parch'] + 1
50
+ train.drop('SibSp', axis=1, inplace=True)
51
+ train.drop('Parch', axis=1, inplace=True)
52
+ #train['Fare'] = train['Fare'].astype(int)
53
+ train.drop(train.columns[np.isnan(train).any()], axis=1, inplace=True)
54
+
55
+ # 欠損値の補完
56
+ test_age_mean = test['Age'].mean()
57
+ test.fillna(value={'Age':test_age_mean}, inplace=True)
58
+ test['Age'] = test['Age'].astype(int)
59
+ # 特徴量の削除
60
+ test.drop('PassengerId', axis=1, inplace=True)
61
+ test.drop('Name', axis=1, inplace=True)
62
+ test.drop('Ticket', axis=1, inplace=True)
63
+ test.drop('Cabin', axis=1, inplace=True)
64
+ test.drop('Embarked', axis=1, inplace=True)
65
+ # 特徴量の値の変化
66
+ test.replace({'male':0, 'female':0}, inplace=True)
67
+ # 特徴量エンジニアリング
68
+ test['familysize'] = test['SibSp'] + test['Parch'] + 1
69
+ test.drop('SibSp', axis=1, inplace=True)
70
+ test.drop('Parch', axis=1, inplace=True)
71
+ #train['Fare'] = train['Fare'].astype(int)
72
+ test.drop(test.columns[np.isnan(test).any()], axis=1, inplace=True)
73
+
74
+ train
75
+
76
+ # 説明変数と目的変数の定義
77
+ train = train[train.columns[::-1]]
78
+ x_train = train.loc[:, :'Pclass']
79
+ y_train = train.loc[:, 'Survived']
80
+ x_test = test
14
- ソースコード
81
+ x_test
82
+
83
+ # モデルの作成
84
+ lr = LogisticRegression()
85
+ lr.fit(x_train, y_train)
86
+ lr.predict(x_test)
15
87
  ```
16
88
 
17
89
  ### 試したこと