kaggle home credit urlに挑戦しています.
。簡単に最低限必要な前処理だけしてサブミットしようとしていますがエラーが解決できません。
普段はnotebookでやっていますが最後の行のtrain_test_splitを実行した際にエラーが出ます。行数があっていないようなエラーですがどうすればいいでしょうか
発生している問題・エラーメッセージ
Found input variables with inconsistent numbers of samples: [307509, 307511]
該当のソースコード
python
1train=pd.read_csv('../input/application_train.csv')2test=pd.read_csv('../input/application_test.csv')34from sklearn.preprocessing import LabelEncoder
5le = LabelEncoder()6le_count =078# Iterate through the columns9for col in train:10if train[col].dtype =='object':11# If 2 or fewer unique categories12iflen(list(train[col].unique()))<=2:13# Train on the training data14 le.fit(train[col])15# Transform both training and testing data16 train[col]= le.transform(train[col])17 test[col]= le.transform(test[col])1819train.fillna(method="ffill")20test.fillna(method="ffill")2122train = pd.get_dummies(train)23test = pd.get_dummies(test)2425train_X=train.drop(train['TARGET'])26y=train['TARGET']2728train_X=train_X.drop(['TARGET'],axis=1)2930import lightgbm as lgb
3132from sklearn.model_selection import train_test_split
33from sklearn.metrics import accuracy_score
3435train_x,valid_x,train_y,valid_y=train_test_split(train_X,y,test_size=0.33,random_state=0)36
1train=pd.read_csv('../input/application_train.csv')2test=pd.read_csv('../input/application_test.csv')34from sklearn.preprocessing import LabelEncoder
5le = LabelEncoder()6le_count =078# Iterate through the columns9for col in train:10if train[col].dtype =='object':11# If 2 or fewer unique categories12iflen(list(train[col].unique()))<=2:13# Train on the training data14 le.fit(train[col])15# Transform both training and testing data16 train[col]= le.transform(train[col])17 test[col]= le.transform(test[col])1819train.fillna(method="ffill")20test.fillna(method="ffill")2122train = pd.get_dummies(train)23test = pd.get_dummies(test)2425y=train['TARGET']2627train_X=train.drop(['TARGET'],axis=1)2829import lightgbm as lgb
3031from sklearn.model_selection import train_test_split
32from sklearn.metrics import accuracy_score
3334train_x,valid_x,train_y,valid_y=train_test_split(train_X,y,test_size=0.33,random_state=0)
なお、変更点は以下の通りです。
diff
1train=pd.read_csv('../input/application_train.csv')
2test=pd.read_csv('../input/application_test.csv')
34from sklearn.preprocessing import LabelEncoder
5le = LabelEncoder()
6le_count = 0
78# Iterate through the columns
9for col in train:
10 if train[col].dtype == 'object':
11 # If 2 or fewer unique categories
12 if len(list(train[col].unique())) <= 2:
13 # Train on the training data
14 le.fit(train[col])
15 # Transform both training and testing data
16 train[col] = le.transform(train[col])
17 test[col] = le.transform(test[col])
1819train.fillna(method="ffill")
20test.fillna(method="ffill")
2122train = pd.get_dummies(train)
23test = pd.get_dummies(test)
2425- train_X=train.drop(train['TARGET'])
26y=train['TARGET']
2728- train_X=train_X.drop(['TARGET'],axis=1)
29+ train_X=train.drop(['TARGET'],axis=1)
3031import lightgbm as lgb
3233from sklearn.model_selection import train_test_split
34from sklearn.metrics import accuracy_score
3536train_x,valid_x,train_y,valid_y=train_test_split(train_X,y,test_size=0.33,random_state=0)
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。