1.前提・実現したいこと
optunaで、lightGBMのハイパラメータチューニングを行っています。
3.で示したコードで、エラーが発生します。対処方法をお願いします。
2.発生している問題・エラーメッセージ
3.のソースコード実行(最終行のコード)で下記のエラーメッセージが発生
-> 1377 raise KeyError(f"{not_found} not in index")
1378
1379
KeyError: '[1, 7, 16, 20, 22, 44, 45, 60, 64, 65, 69, 70, 75, 81, 85, 89, 94, 97, 99, 101, 118, 120, 121, 125, 134, 141, 144, 162, 171, 174, 179, 187, 191, 196, 198, 204, 209, 214, 224, 226, 238, 245, 260, 262, 282, 283, 296, 306, 341, 349, 358, 359, 362, 363, 375, 381, 389, 392, 402, 403, 407, 410, 412, 414, 415, 421, 428, 429, 430, 439, 449, 451, 461, 470, 472, 485, 489, 496, 499, 501, 504, 510, 513, 516, 528, 531, 534, 538, 546, 553, 554, 555, 564, 565, 572, 578, 585, 586, 588, 593, 597, 617, 637, 641, 646, 663, 682, 687, 697, 705, 716, 719, 720, 725, 729, 734, 737, 747, 754, 755, 759, 766, 770, 778, 794, 797, 799, 800, 804, 806, 807, 811, 818, 820, 824, 833, 835, 838, 839, 846, 848, 860, 869, 871, 897, 911, 919, 923, 931, 937, 948, 949, 951, 955, 962, 977, 995, 1001, 1004, 1008, 1009, 1028, 1032, 1037, 1052, 1056, 1059, 1064, 1073, 1078, 1085, 1089, 1090, 1105, 1117, 1121, 1123, 1136, 1138, 1140, 1146, 1155, 1158, 1164, 1173, 1178, 1188, 1189, 1195, 1198, 1210, 1214, 1218, 1231, 1235, 1236, 1244, 1245, 1253, 1268, 1272, 1274, 1275, 1305, 1306, 1309, 1311, 1314, 1317, 1318, 1331, 1340, 1342, 1343, 1344, 1367, 1370, 1371, 1374, 1379, 1381, 1382, 1385, 1393, 1398, 1403, 1414, 1425, 1445, 1457, 1460, 1461, 1469, 1481, 1483, 1496, 1497, 1499, 1500, 1503, 1510, 1511, 1513, 1535, 1539, 1545, 1547, 1559, 1560, 1597, 1599, 1600, 1605, 1609, 1612, 1614, 1620, 1623, 1628, 1633, 1635, 1637, 1662, 1667, 1674, 1682, 1693, 1698, 1702, 1707, 1708, 1713, 1728, 1729, 1730, 17...
3.該当のソースコード
python3 # common import pandas as pd import numpy as np from pandas import DataFrame, Series import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression from sklearn.neural_network import MLPClassifier # モデリング: lightgbm import lightgbm as lgb # optuna ! pip install optuna import optuna #データ読み込み等は、省略 #ホールドアウト検証 x_train, x_test, y_train, y_test,id_train,id_test= train_test_split(X, y, id, test_size=0.2, shuffle=True, stratify=y, random_state=123) print(x_train.shape, y_train.shape) print(x_test.shape, y_test.shape) print(id_train.shape, id_test.shape) '''プリント文のアウトプットは以下のもの (80000, 18) (80000, 1) (20000, 18) (20000, 1) (80000,) (20000,) ’’’ # 探索しないハイパーパラメータ params_base = { "boosting_type": "gbdt", "objective": "binary", "metric": "auc", "learning_rate": 0.02, 'n_estimators': 100000, "bagging_freq": 1, "seed": 123, } def objective(trial): # 探索するハイパーパラメータ params_tuning = { "num_leaves": trial.suggest_int("num_leaves", 8, 256), "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 5, 200), "min_sum_hessian_in_leaf": trial.suggest_float("min_sum_hessian_in_leaf", 1e-5, 1e-2, log=True), "feature_fraction": trial.suggest_float("feature_fraction", 0.5, 1.0), "bagging_fraction": trial.suggest_float("bagging_fraction", 0.5, 1.0), "lambda_l1": trial.suggest_float("lambda_l1", 1e-2, 1e2, log=True), "lambda_l2": trial.suggest_float("lambda_l2", 1e-2, 1e2, log=True), } params_tuning.update(params_base) # モデル学習・評価 list_metrics = [] cv = list(StratifiedKFold(n_splits=5, shuffle=True, random_state=123).split(x_train, y_train)) for nfold in np.arange(5): idx_tr, idx_va = cv[nfold][0], cv[nfold][1] x_tr, y_tr = x_train.loc[idx_tr, :], y_train.loc[idx_tr, :] x_va, y_va = x_train.loc[idx_va, :], y_train.loc[idx_va, :] model = lgb.LGBMClassifier(**params_tuning) model.fit(x_tr, y_tr, eval_set=[(x_tr,y_tr), (x_va,y_va)], early_stopping_rounds=100, verbose=0, ) y_va_pred = model.predict_proba(x_va)[:,1] metric_va = accuracy_score(y_va, np.where(y_va_pred>=0.5, 1, 0)) list_metrics.append(metric_va) # 評価値の計算 metrics = np.mean(list_metrics) return metrics sampler = optuna.samplers.TPESampler(seed=123) study = optuna.create_study(sampler=sampler, direction="maximize") study.optimize(objective, n_trials=10)#この行で、エラーが発生
4.自分で調べたことや試したこと
Google検索では、有用な情報なし。optunaコードは以前は正常に動いたもの。
データ入力型式の問題か?
5.使っているツールのバージョンなど補足情報
開発環境:Google Colaboratory
プログラム言語:python3
OS:windows10 Home
CPU:Intel(R) Core(TM) i7-7500U CPU@2.70GHz 2.90GHz
回答1件
あなたの回答
tips
プレビュー