はじめてKaggleに登録して、titanicの生存者予測タスクにあたって、次のようなコードを書いてみました。
python
1import lightgbm as lgbm 2from sklearn.model_selection import train_test_split 3from sklearn.metrics import accuracy_score 4import pandas as pd 5import pandas_profiling as pdp 6import numpy as np 7import os 8from sklearn.linear_model import LogisticRegression 9from sklearn.ensemble import RandomForestClassifier 10train = pd.read_csv('../input/titanic/train.csv') 11test = pd.read_csv('../input/titanic/test.csv') 12train.head() 13# print('fuck') 14train= pd.get_dummies(train, columns=['Sex', 'Embarked']) 15test = pd.get_dummies(test, columns=['Sex', 'Embarked']) 16 17# train.head() 18train = train.drop(['PassengerId', 'Name', 'Cabin', 'Ticket'], axis=1) 19test = test.drop(['PassengerId', 'Name', 'Cabin', 'Ticket'], axis=1) 20 21x_train = train.drop(['Survived','Age'], axis=1) 22x_train = x_train.drop(x_train.columns[np.isnan(x_train).any()], axis=1) 23x_test = x_test.drop(x_test.columns[np.isnan(x_test).any()], axis=1) 24 25y_train = train['Survived'] 26x_test = train.drop('Survived', axis=1) 27 28model = RandomForestClassifier() 29model.fit(x_train, y_train) 30y_pred = model.predict(x_test) 31 32logreg.socre(x_train, y_train) 33
そうすると、以下のエラーメッセージがでます。下のコードでNANを削除しているのですが、
x_train = x_train.drop(x_train.columns[np.isnan(x_train).any()], axis=1)
Error
1--------------------------------------------------------------------------- 2ValueError Traceback (most recent call last) 3<ipython-input-18-b40df7e0435a> in <module> 4 31 x_test = train.drop('Survived', axis=1) 5 32 model.fit(x_train, y_train) 6---> 33 y_pred = model.predict(x_test) 7 34 8 35 logreg.socre(x_train, y_train) 9 10/opt/conda/lib/python3.6/site-packages/sklearn/ensemble/forest.py in predict(self, X) 11 543 The predicted classes. 12 544 """ 13--> 545 proba = self.predict_proba(X) 14 546 15 547 if self.n_outputs_ == 1: 16 17/opt/conda/lib/python3.6/site-packages/sklearn/ensemble/forest.py in predict_proba(self, X) 18 586 check_is_fitted(self, 'estimators_') 19 587 # Check data 20--> 588 X = self._validate_X_predict(X) 21 589 22 590 # Assign chunk of trees to jobs 23 24/opt/conda/lib/python3.6/site-packages/sklearn/ensemble/forest.py in _validate_X_predict(self, X) 25 357 "call `fit` before exploiting the model.") 26 358 27--> 359 return self.estimators_[0]._validate_X_predict(X, check_input=True) 28 360 29 361 @property 30 31/opt/conda/lib/python3.6/site-packages/sklearn/tree/tree.py in _validate_X_predict(self, X, check_input) 32 389 """Validate X whenever one tries to predict, apply, predict_proba""" 33 390 if check_input: 34--> 391 X = check_array(X, dtype=DTYPE, accept_sparse="csr") 35 392 if issparse(X) and (X.indices.dtype != np.intc or 36 393 X.indptr.dtype != np.intc): 37 38/opt/conda/lib/python3.6/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator) 39 540 if force_all_finite: 40 541 _assert_all_finite(array, 41--> 542 allow_nan=force_all_finite == 'allow-nan') 42 543 43 544 if ensure_min_samples > 0: 44 45/opt/conda/lib/python3.6/site-packages/sklearn/utils/validation.py in _assert_all_finite(X, allow_nan) 46 54 not allow_nan and not np.isfinite(X).all()): 47 55 type_err = 'infinity' if allow_nan else 'NaN, infinity' 48---> 56 raise ValueError(msg_err.format(type_err, X.dtype)) 49 57 # for object dtype data, we only check for NaNs (GH-13254) 50 58 elif X.dtype == np.dtype('object') and not allow_nan: 51 52ValueError: Input contains NaN, infinity or a value too large for dtype('float32').
どうしたらよいでしょうか。

回答1件
あなたの回答
tips
プレビュー