前提・実現したいこと
不均衡データを3つのクラスに分類し、グリッドサーチをしたいのですが
上手くいきません。
発生している問題・エラーメッセージ
ValueError Traceback (most recent call last) <ipython-input-15-32a008acd626> in <module> 41 grid_search = GridSearchCV(LinearSVC(class_weight="balanced"), param_grid, cv = 4) 42 ---> 43 grid_search.fit(X_train_std, y_train) 44 45 # 結果 ~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params) 686 return results 687 --> 688 self._run_search(evaluate_candidates) 689 690 # For multi-metric evaluation, store the best_index_, best_params_ and ~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates) 1147 def _run_search(self, evaluate_candidates): 1148 """Search all candidates in param_grid""" -> 1149 evaluate_candidates(ParameterGrid(self.param_grid)) 1150 1151 ~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params) 665 for parameters, (train, test) 666 in product(candidate_params, --> 667 cv.split(X, y, groups))) 668 669 if len(out) < 1: ~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable) 919 # remaining jobs. 920 self._iterating = False --> 921 if self.dispatch_one_batch(iterator): 922 self._iterating = self._original_iterator is not None 923 ~\Anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator) 757 return False 758 else: --> 759 self._dispatch(tasks) 760 return True 761 ~\Anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch) 714 with self._lock: 715 job_idx = len(self._jobs) --> 716 job = self._backend.apply_async(batch, callback=cb) 717 # A job can complete so quickly than its callback is 718 # called before we get here, causing self._jobs to ~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback) 180 def apply_async(self, func, callback=None): 181 """Schedule a func to be run""" --> 182 result = ImmediateResult(func) 183 if callback: 184 callback(result) ~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch) 547 # Don't delay the application, to avoid keeping the input 548 # arguments in memory --> 549 self.results = batch() 550 551 def get(self): ~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self) 223 with parallel_backend(self._backend, n_jobs=self._n_jobs): 224 return [func(*args, **kwargs) --> 225 for func, args, kwargs in self.items] 226 227 def __len__(self): ~\Anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0) 223 with parallel_backend(self._backend, n_jobs=self._n_jobs): 224 return [func(*args, **kwargs) --> 225 for func, args, kwargs in self.items] 226 227 def __len__(self): ~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score) 501 train_scores = {} 502 if parameters is not None: --> 503 estimator.set_params(**parameters) 504 505 start_time = time.time() ~\Anaconda3\lib\site-packages\sklearn\base.py in set_params(self, **params) 222 'Check the list of available parameters ' 223 'with `estimator.get_params().keys()`.' % --> 224 (key, self)) 225 226 if delim: ValueError: Invalid parameter gamma for estimator LinearSVC(C=0.001, class_weight='balanced', dual=True, fit_intercept=True, intercept_scaling=1, loss='squared_hinge', max_iter=1000, multi_class='ovr', penalty='l2', random_state=None, tol=0.0001, verbose=0). Check the list of available parameters with `estimator.get_params().keys()`. ```Python ソースコード
# データ加工・処理・分析ライブラリ
import numpy as np
import numpy.random as random
import scipy as sp
from pandas import Series, DataFrame
import pandas as pd
# 可視化ライブラリ
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
%matplotlib inline
%precision 3
# 機械学習ライブラリ
import sklearn
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler
# データの読み込み
data = pd.read_csv("2017現場加速度計測_教師データまとめ.csv", encoding="shift_jis")
data.assign(生産性分類=pd.to_numeric(data.生産性分類))
# 訓練データとテストデータに分ける
X = data[["km", "平均合成加速度", "sx", "sy", "sz", "mean_x", "mean_y", "mean_z"]]
y = data["生産性分類"]
X_train, X_test, y_train, y_test = train_test_split(X, y,stratify = y, random_state=0)
# 標準化
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
# 学習(SVMによる分類とグリッドサーチ)
param_grid = {"C": [0.001, 0.01, 0.1, 1, 10, 100], "gamma": [0.001, 0.01, 0.1, 1, 10, 100]}
grid_search = GridSearchCV(LinearSVC(class_weight="balanced"), param_grid, cv = 4)
grid_search.fit(X_train_std, y_train)
# 結果
print("Test set score: {:.3f}".format(grid_search.score(X_test_std, y_test)))
print("Best parameters: {:.3f}".format(grid_search.best_params_))
print("Best cross-validation score: {:.3f}".format(grid_search.best_score_))
回答1件
あなたの回答
tips
プレビュー