scikit-learnのRandomizedSearchCVエラー

Question

```ここに言語を入力 import numpy as np from sklearn import datasets from sklearn.model_selection import GridSearchCV from sklearn.linear_model import LogisticRegression from sklearn.decomposition import PCA from sklearn.svm import SVC from sklearn.pipeline import Pipeline digits = datasets.load_digits() X,y=digits.data,digits.target from sklearn.model_selection import train_test_split X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0) clf1=LogisticRegression() clf2=SVC() estimators = [('pca', PCA()), ('clf', clf1)] pipe1 = Pipeline(estimators) param1 = {'clf__C':[1e-5, 1e-3, 1e-2, 1, 1e2, 1e5, 1e10], 'pca__whiten':[True,False], } gs = GridSearchCV(pipe1, param1) gs.fit(X_train, y_train) gs.score(X_test, y_test) from sklearn.model_selection import RandomizedSearchCV estimators= [('pca', PCA()), ('clf',SVC())] pipe2 = Pipeline(estimators) gamma_range_exp = np.arange(-10.0, 0.0, 3) gamma_range = 10 ** gamma_range_exp param2 =[ {'clf__C':[1e-5, 1e-3, 1e-2, 1, 1e2, 1e5, 1e10], 'clf__kernel':['linear'], 'pca__whiten':[True,False], 'pca__n_components': [30, 20, 10]}, {'clf__C':[1e-5, 1e-3, 1e-2, 1, 1e2, 1e5, 1e10], 'clf__kernel':['rbf'], 'gamma': gamma_range, 'pca__whiten':[True,False], 'pca__n_components': [30, 20, 10]} ] gs = RandomizedSearchCV(pipe2, param2, n_jobs=-1, verbose=2) gs.fit(X_train, y_train) ``` エラー内容 AttributeError Traceback (most recent call last) in () 12 13 gs = RandomizedSearchCV(pipe2, param2, n_jobs=-1, verbose=2) ---> 14 gs.fit(X_train, y_train) ~/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_search.py in fit(self, X, y, groups, **fit_params) 616 n_splits = cv.get_n_splits(X, y, groups) 617 # Regenerate parameter iterable for each fit --> 618 candidate_params = list(self._get_param_iterator()) 619 n_candidates = len(candidate_params) 620 if self.verbose > 0: ~/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_search.py in __iter__(self) 236 # in this case we want to sample without replacement 237 all_lists = np.all([not hasattr(v, "rvs") --> 238 for v in self.param_distributions.values()]) 239 rnd = check_random_state(self.random_state) 240 AttributeError: 'list' object has no attribute 'values' ![イメージ説明](168d763492519882668efddcde7ef8a6.png) ![イメージ説明](b3d7e58c75630d2ab06e556e36dc980d.png) ![イメージ説明](5b166249ca02f7a0fc561b5b3c3423d6.png) ![イメージ説明](47f8035c0822aaf4a7e8e004d800bed0.png) ![イメージ説明](ba0f9298612d711f93e24972c6adaa40.png) 1のようにグリットサーチをしたかったのですが、エラーが出てしまいました。何がちがうのでしょうか？

Accepted Answer

umyuさんのfixで問題なく動かせました。

python: 3.6.3
numpy: 1.14.2
sklearn: 0.19.1

```python
import numpy as np
from sklearn import datasets
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
digits = datasets.load_digits()

X, y = digits.data, digits.target

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

clf1 = LogisticRegression()
clf2 = SVC()

estimators = [('pca', PCA()), 
              ('clf', clf1)]
pipe1 = Pipeline(estimators)

param1 = {'clf__C': [1e-5, 1e-3, 1e-2, 1, 1e2, 1e5, 1e10],
          'pca__whiten': [True, False]} 

gs = GridSearchCV(pipe1, param1)
gs.fit(X_train, y_train)

print(gs.score(X_test, y_test))

from sklearn.model_selection import RandomizedSearchCV
estimators = [('pca', PCA()), 
              ('clf', SVC())]
pipe2 = Pipeline(estimators)
gamma_range_exp = np.arange(-10.0, 0.0, 3)
gamma_range = 10 ** gamma_range_exp

param2 = {'clf__C': [1e-5, 1e-3, 1e-2, 1, 1e2, 1e5, 1e10],
          'clf__kernel': ['rbf', 'linear'],
          'clf__gamma': gamma_range,
          'pca__whiten': [True, False],
          'pca__n_components': [30, 20, 10]}

print('start')
gs = RandomizedSearchCV(pipe2, param2, n_jobs=-1, verbose=2)
gs.fit(X_train, y_train)
```

Answer

```Python
AttributeError: 'list' object has no attribute 'values'
```
param2 がlistになっているので、[]を外して実行しても同じエラーが発生しますか？

```Python
param2 ={'clf__C':[1e-5, 1e-3, 1e-2, 1, 1e2, 1e5, 1e10],
                   'clf__kernel':['linear'],
                  'pca__whiten':[True,False],
                  'pca__n_components': [30, 20, 10]},

                 {'clf__C':[1e-5, 1e-3, 1e-2, 1, 1e2, 1e5, 1e10],
                  'clf__kernel':['rbf'],
                  'gamma': gamma_range,
                  'pca__whiten':[True,False],
                  'pca__n_components': [30, 20, 10]}
              
```
追記
[VotingClassifierを使いつつGridSearchCV/RandomizedSearchCVでパラメータチューニング](https://qiita.com/yagays/items/a503117bd06bb938fdb9)

---

```Python
param2 ={'clf__C':[1e-5, 1e-3, 1e-2, 1, 1e2, 1e5, 1e10],
        'clf__kernel':['rbf', 'linear'],
        'clf__gamma': gamma_range,
        'pca__whiten':[True,False],
        'pca__n_components': [30, 20, 10]}

```
◇実行環境
python: 3.6.5
numpy: 1.13.3
sklearn: 0.19.1
OS: Windows 10
PyCharmより実行。
```txt
ImportError: [joblib] Attempting to do parallel computing without protecting your import on a system that does not support forking. To use parallel-computing in a script, you must protect your main loop using "if __name__ == '__main__'". Please see the joblib documentation on Parallel for more information
```
joblibはどこに・・・

---

mkgreiさんに教えて頂いた[リンク](https://stackoverflow.com/questions/40803684/parallel-error-with-gridsearchcv-works-fine-with-other-methods)により質問文のコードを改造した処
Windows環境でもエラーは発生しないようにできました。

```Python
import numpy as np
from sklearn import datasets
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV


def main() ->None:
    digits = datasets.load_digits()
    X, y = digits.data, digits.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    clf1 = LogisticRegression()
    clf2 = SVC()
    estimators = [('pca', PCA()),
                  ('clf', clf1)]
    pipe1 = Pipeline(estimators)
    param1 = {'clf__C': [1e-5, 1e-3, 1e-2, 1, 1e2, 1e5, 1e10],
              'pca__whiten': [True, False]}
    gs = GridSearchCV(pipe1, param1)
    gs.fit(X_train, y_train)
    print(gs.score(X_test, y_test))
    estimators = [('pca', PCA()),
                  ('clf', SVC())]
    pipe2 = Pipeline(estimators)
    gamma_range_exp = np.arange(-10.0, 0.0, 3)
    gamma_range = 10 ** gamma_range_exp

    param2 = {'clf__C': [1e-5, 1e-3, 1e-2, 1, 1e2, 1e5, 1e10],
              'clf__kernel': ['rbf', 'linear'],
              'clf__gamma': gamma_range,
              'pca__whiten': [True, False],
              'pca__n_components': [30, 20, 10]}

    print('start')
    gs = RandomizedSearchCV(pipe2, param2, n_jobs=-1, verbose=2)
    gs.fit(X_train, y_train)


if __name__ == '__main__':
    main()

```