前提・実現したいこと
kaggleのHouse Prices - Advanced Regression Techniquesで
XGBosotの回帰モデルで予測をしたいがDMatrixで変換する際にエラーが発生する。
https://www.kaggle.com/c/house-prices-advanced-regression-techniques/overview/tutorials
■■な機能を実装中に以下のエラーメッセージが発生しました。
発生している問題・エラーメッセージ
エラーメッセージ
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-42-86dc688ea310> in <module> ----> 1 dtrain = xgb.DMatrix(tr_x, label=tr_y) 2 dvalid = xgb.DMatrix(va_x, label = va_y) 3 dtest = xgb.DMatrix(test_x) ~/miniforge3/envs/arm_tenso_env/lib/python3.8/site-packages/xgboost/core.py in inner_f(*args, **kwargs) 504 for k, arg in zip(sig.parameters, args): 505 kwargs[k] = arg --> 506 return f(**kwargs) 507 508 return inner_f ~/miniforge3/envs/arm_tenso_env/lib/python3.8/site-packages/xgboost/core.py in __init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, group, qid, label_lower_bound, label_upper_bound, feature_weights, enable_categorical) 614 return 615 --> 616 handle, feature_names, feature_types = dispatch_data_backend( 617 data, 618 missing=self.missing, ~/miniforge3/envs/arm_tenso_env/lib/python3.8/site-packages/xgboost/data.py in dispatch_data_backend(data, missing, threads, feature_names, feature_types, enable_categorical) 705 return _from_tuple(data, missing, threads, feature_names, feature_types) 706 if _is_pandas_df(data): --> 707 return _from_pandas_df(data, enable_categorical, missing, threads, 708 feature_names, feature_types) 709 if _is_pandas_series(data): ~/miniforge3/envs/arm_tenso_env/lib/python3.8/site-packages/xgboost/data.py in _from_pandas_df(data, enable_categorical, missing, nthread, feature_names, feature_types) 295 feature_types: Optional[List[str]], 296 ): --> 297 data, feature_names, feature_types = _transform_pandas_df( 298 data, enable_categorical, feature_names, feature_types) 299 return _from_numpy_array(data, missing, nthread, feature_names, ~/miniforge3/envs/arm_tenso_env/lib/python3.8/site-packages/xgboost/data.py in _transform_pandas_df(data, enable_categorical, feature_names, feature_types, meta, meta_type) 240 categorical type is supplied, DMatrix parameter `enable_categorical` must 241 be set to `True`.""" --> 242 raise ValueError(msg + ', '.join(bad_fields)) 243 244 # handle feature names ValueError: DataFrame.dtypes for data must be int, float, bool or category. When categorical type is supplied, DMatrix parameter `enable_categorical` must be set to `True`.LotFrontage, LotConfig, LandSlope, Neighborhood, Condition1, Condition2, BldgType, HouseStyle, RoofStyle, RoofMatl, Exterior1st, Exterior2nd, MasVnrType, MasVnrArea, ExterQual, ExterCond, Foundation, BsmtQual, BsmtCond, BsmtExposure, BsmtFinType1, BsmtFinType2, Heating, HeatingQC, CentralAir, Electrical, KitchenQual, Functional, FireplaceQu, GarageType, GarageYrBlt, GarageFinish, GarageQual, GarageCond, PavedDrive
該当のソースコード
python
1import numpy as np 2import pandas as pd 3import xgboost as xgb 4from sklearn.preprocessing import LabelEncoder 5from sklearn.model_selection import KFold 6 7test = pd.read_csv("test.csv") 8train = pd.read_csv("train.csv") 9 10#train dataを目的変数と説明変数に分類する 11#目的変数はsales price 12train_x = train.drop(["SalePrice"], axis=1) 13train_y = train["SalePrice"] 14 15#test dataの説明変数の設定 16test_x = test.copy() 17 18#欠損値をmissという文字列に置き換える 19test_x = test_x.fillna("miss") 20train_x = train_x.fillna("miss") 21 22#ラベルエンコーディングの実施 23le = LabelEncoder() 24for column in ["MSZoning","Street","LotShape","LandContour","Fence","Fence","MiscFeature","SaleType","SaleCondition","PoolQC","Alley","Utilities"]: 25 le.fit(train_x[column]) 26 train_x[column] = le.transform(train_x[column]) 27 28for column in ["MSZoning","Street","LotShape","LandContour","Fence","Fence","MiscFeature","SaleType","SaleCondition","PoolQC","Alley","Utilities"]: 29 le.fit(test_x[column]) 30 test_x[column] = le.transform(test_x[column]) 31 32#テストデータとバリデーションデータに分類する 33kf = KFold(n_splits=4, shuffle=True, random_state=71) 34for tr_idx, va_idx in kf.split(train_x): 35 tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx] 36 tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx] 37 38dtrain = xgb.DMatrix(tr_x, label=tr_y) 39dvalid = xgb.DMatrix(va_x, label = va_y) 40dtest = xgb.DMatrix(test_x) 41 42---------エラー発生--------------- 43--------------------------------------------------------------------------- 44ValueError Traceback (most recent call last) 45<ipython-input-42-86dc688ea310> in <module> 46----> 1 dtrain = xgb.DMatrix(tr_x, label=tr_y) 47 2 dvalid = xgb.DMatrix(va_x, label = va_y) 48 3 dtest = xgb.DMatrix(test_x) 49 50~/miniforge3/envs/arm_tenso_env/lib/python3.8/site-packages/xgboost/core.py in inner_f(*args, **kwargs) 51 504 for k, arg in zip(sig.parameters, args): 52 505 kwargs[k] = arg 53--> 506 return f(**kwargs) 54 507 55 508 return inner_f 56 57~/miniforge3/envs/arm_tenso_env/lib/python3.8/site-packages/xgboost/core.py in __init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, group, qid, label_lower_bound, label_upper_bound, feature_weights, enable_categorical) 58 614 return 59 615 60--> 616 handle, feature_names, feature_types = dispatch_data_backend( 61 617 data, 62 618 missing=self.missing, 63 64~/miniforge3/envs/arm_tenso_env/lib/python3.8/site-packages/xgboost/data.py in dispatch_data_backend(data, missing, threads, feature_names, feature_types, enable_categorical) 65 705 return _from_tuple(data, missing, threads, feature_names, feature_types) 66 706 if _is_pandas_df(data): 67--> 707 return _from_pandas_df(data, enable_categorical, missing, threads, 68 708 feature_names, feature_types) 69 709 if _is_pandas_series(data): 70 71~/miniforge3/envs/arm_tenso_env/lib/python3.8/site-packages/xgboost/data.py in _from_pandas_df(data, enable_categorical, missing, nthread, feature_names, feature_types) 72 295 feature_types: Optional[List[str]], 73 296 ): 74--> 297 data, feature_names, feature_types = _transform_pandas_df( 75 298 data, enable_categorical, feature_names, feature_types) 76 299 return _from_numpy_array(data, missing, nthread, feature_names, 77 78~/miniforge3/envs/arm_tenso_env/lib/python3.8/site-packages/xgboost/data.py in _transform_pandas_df(data, enable_categorical, feature_names, feature_types, meta, meta_type) 79 240 categorical type is supplied, DMatrix parameter `enable_categorical` must 80 241 be set to `True`.""" 81--> 242 raise ValueError(msg + ', '.join(bad_fields)) 82 243 83 244 # handle feature names 84 85ValueError: DataFrame.dtypes for data must be int, float, bool or category. When 86 categorical type is supplied, DMatrix parameter `enable_categorical` must 87 be set to `True`.LotFrontage, LotConfig, LandSlope, Neighborhood, Condition1, Condition2, BldgType, HouseStyle, RoofStyle, RoofMatl, Exterior1st, Exterior2nd, MasVnrType, MasVnrArea, ExterQual, ExterCond, Foundation, BsmtQual, BsmtCond, BsmtExposure, BsmtFinType1, BsmtFinType2, Heating, HeatingQC, CentralAir, Electrical, KitchenQual, Functional, FireplaceQu, GarageType, GarageYrBlt, GarageFinish, GarageQual, GarageCond, PavedDrive 88
試したこと
DMatrixをせずに予測しようとしたが同様のエラーがfitの際に発生。
補足情報(FW/ツールのバージョンなど)
ここにより詳細な情報を記載してください。
回答1件
あなたの回答
tips
プレビュー