下記、普通にロジスティック回帰をしていますが、なぜか以下のようなエラーがでるときとそうでないときがあります。調べてみると、sklearnのバグだとという方もいましたがそうだと思い難いです。
なにかお気づきの点ありがましたらご教示いただけませんでしょうか?
# ロジスティック回帰の計算処理 def _calc_logreg(X_train, y_train): logreg = LogisticRegression(solver='lbfgs') logreg.fit(X_train, y_train) slope = logreg.coef_[0, 0] intercept = logreg.intercept_[0] return slope, intercept
テストコードとエラー画面
def test_make_params(): graph_id = 'Test' ability = ["Z1", "Z2", "Z3", "A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3", "DD1", "DD2", "DD3"] lists = [['Question'+str(random.randint(1, 10)), random.choice(ability), random.randint(0, 1)] for j in range(100)] two_arrays = np.array(lists) > assert len(make_params(graph_id, two_arrays)) == 10 tests/test_estimater.py:15: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=1... n_jobs=None, penalty='l2', random_state=None, solver='lbfgs', tol=0.0001, verbose=0, warm_start=False) X = array([[ 1.9], [-1.3], [ 2.8], [ 2.3], [ 1.5], [-0.4], [ 0.8], [-0.4], [ 0. ], [ 1.9], [ 2.3], [-1.7], [ 0.4]]) y = array(['1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1'], dtype='<U1') sample_weight = None def fit(self, X, y, sample_weight=None): """Fit the model according to the given training data. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples,) Target vector relative to X. sample_weight : array-like, shape (n_samples,) optional Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. .. versionadded:: 0.17 *sample_weight* support to LogisticRegression. Returns ------- self : object """ if not isinstance(self.C, numbers.Number) or self.C < 0: raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0: raise ValueError("Maximum number of iteration must be positive;" " got (max_iter=%r)" % self.max_iter) if not isinstance(self.tol, numbers.Number) or self.tol < 0: raise ValueError("Tolerance for stopping criteria must be " "positive; got (tol=%r)" % self.tol) solver = _check_solver(self.solver, self.penalty, self.dual) if solver in ['newton-cg']: _dtype = [np.float64, np.float32] else: _dtype = np.float64 X, y = check_X_y(X, y, accept_sparse='csr', dtype=_dtype, order="C", accept_large_sparse=solver != 'liblinear') check_classification_targets(y) self.classes_ = np.unique(y) n_samples, n_features = X.shape multi_class = _check_multi_class(self.multi_class, solver, len(self.classes_)) if solver == 'liblinear': if effective_n_jobs(self.n_jobs) != 1: warnings.warn("'n_jobs' > 1 does not have any effect when" " 'solver' is set to 'liblinear'. Got 'n_jobs'" " = {}.".format(effective_n_jobs(self.n_jobs))) self.coef_, self.intercept_, n_iter_ = _fit_liblinear( X, y, self.C, self.fit_intercept, self.intercept_scaling, self.class_weight, self.penalty, self.dual, self.verbose, self.max_iter, self.tol, self.random_state, sample_weight=sample_weight) self.n_iter_ = np.array([n_iter_]) return self if solver in ['sag', 'saga']: max_squared_sum = row_norms(X, squared=True).max() else: max_squared_sum = None n_classes = len(self.classes_) classes_ = self.classes_ if n_classes < 2: raise ValueError("This solver needs samples of at least 2 classes" " in the data, but the data contains only one" > " class: %r" % classes_[0]) E ValueError: This solver needs samples of at least 2 classes in the data, but the data contains only one class: '1' venv/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:1319: ValueError ==================== 1 failed, 163 passed in 41.87 seconds =====================
回答1件
あなたの回答
tips
プレビュー