翔泳社の[Kaggle データ分析」に則り、ボストン住宅価格のAI構築でlight GBMを(カテゴリー変数を入れて)使いたいのですが、エラーが出てしまいます。タイプミスではないようなのですが、どこで間違えてしまっているのでしょうか?
Google Coraborately で進めています。
python
#パッケージのインポート import pandas as pd import matplotlib.pyplot as plt %matplotlib inline import seaborn as sns import numpy as np import random from sklearn.preprocessing import LabelEncoder import lightgbm as lgb np.random.seed(1234) random.seed(1234) from sklearn.model_selection import KFold folds=3 kf=KFold(n_splits=folds) from sklearn.metrics import mean_squared_error #データファイルの読み込み df_train=pd.read_csv('/content/drive/MyDrive/train_house.csv',encoding='cp932') df_test=pd.read_csv('/content/drive/MyDrive/test_house.csv',encoding='cp932') plt.style.use('ggplot') #データの確認 df_train.shape df_train.dtypes df_train.head() df_train['MSZoning'].value_counts() all_df=pd.concat([df_train,df_test],sort=False).reset_index(drop=True) all_df['MSZoning'].value_counts() categolies=all_df.columns[all_df.dtypes=="object"] categolies for cat in categolies: le=LabelEncoder() print(cat) all_df[cat].fillna('missing',inplace=True) le=le.fit(all_df[cat]) all_df[cat]=le.transform(all_df[cat]) all_df[cat]=all_df[cat].astype('category') all_df[cat].dtypes train_df_le=all_df[~all_df['SalePrice'].isnull()] test_df_le=all_df[all_df['SalePrice'].isnull()] lgbm_params={ 'objective':'regression', 'random_seed':1234 } train_x=train_df_le.drop(['SalePrice','Id'],axis=1) train_y=train_df_le['SalePrice'] models=[] rmses=[] oof=np.zeros(len(train_x)) for train_index,val_index in kf.split(train_x): x_train =train_x.iloc[train_index] x_valid =train_x.iloc[val_index] y_train =train_y.iloc[train_index] y_valid =train_y.iloc[val_index] lgb_train=lgb.Dataset(x_train,y_train) lgb_eval=lgb.Dataset(x_valid,y_valid,reference=lgb_train) #エラーの出る箇所# model_lgb=lgb.train(lgbm_params,lgb_train,valid_sets=lgb_eval,num_boost_round=100,early_stopping_rounds=20,verbose_eval=10) y_pred=model_lgb.predict(x_valid,num_iteration=model_lgb.best_iteration) tmp_rmse=np.sqrt(mean_squared_error(np.log(y_valid),np.log(y_pred))) print(tmp_rmse) models.append(model_lgb) emse.append(tmp_rmse) oof[val_index]=y_pred
#エラーの内容#
ValueError: DataFrame.dtypes for data must be int, float or bool.
Did not expect the data types in fields MSZoning, Street, Alley, LotShape, LandContour, Utilities, LotConfig, LandSlope, Neighborhood, Condition1, Condition2, BldgType, HouseStyle, RoofStyle, RoofMatl, Exterior1st, Exterior2nd, MasVnrType, ExterQual, ExterCond, Foundation, BsmtQual, BsmtCond, BsmtExposure, BsmtFinType1, BsmtFinType2, Heating, HeatingQC, CentralAir, Electrical, KitchenQual, Functional, FireplaceQu, GarageType, GarageFinish, GarageQual, GarageCond, PavedDrive, PoolQC, Fence, MiscFeature, SaleType
まだ回答がついていません
会員登録して回答してみよう