ValueError: continuous format is not supportedの対処方法

1.前提・実現したいこと
不均衡問題を異常検知問題として扱う方法（https://qiita.com/tk-tatsuro/items/10e9dbb3f2cf030e2119）を読み、コードを動かしています。
ValueError: continuous format is not supportedというエラーが出ます。
対処方法をお願いします。

2.発生している問題・エラーメッセージ
３．のソースコード実行（最終行のコード）で下記のエラーメッセージが発生
-> ValueError Traceback (most recent call last)
<ipython-input-8-8f5107fe8148> in <module>()
89
90 # ROC曲線_エラー発生
---> 91 plot_roc_curve(y_pred,y_test)
92
93 # Precision-Recall曲線

2 frames
/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_ranking.py in _binary_clf_curve(y_true, y_score, pos_label, sample_weight)
729 y_type = type_of_target(y_true)
730 if not (y_type == "binary" or (y_type == "multiclass" and pos_label is not None)):
--> 731 raise ValueError("{0} format is not supported".format(y_type))
732
733 check_consistent_length(y_true, y_score, sample_weight)

ValueError: continuous format is not supported

3.該当のソースコード

python3
1import pandas as pd
2import seaborn as sns
3import matplotlib.pyplot as plt
4%matplotlib inline
5
6# データ取得
7creditcard = pd.read_csv("×××（内部ディレクトリ名）/creditcard.csv", sep=',')
8
9import numpy as np
10from sklearn import metrics
11from sklearn.metrics import accuracy_score
12from sklearn.metrics import precision_score
13from sklearn.metrics import recall_score
14from sklearn.metrics import f1_score
15from sklearn.model_selection import train_test_split
16from sklearn.linear_model import LogisticRegression
17from sklearn.metrics import confusion_matrix
18from sklearn.metrics import plot_confusion_matrix
19from sklearn.metrics import roc_auc_score # ROC曲線
20
21# アンダーサンプリング
22from imblearn.under_sampling import RandomUnderSampler
23
24target = 'Class'
25rs = RandomUnderSampler(random_state=42)
26under_sampling ,_ = rs.fit_resample(creditcard, creditcard[target])
27
28print('*'*20)
29print('＜元のデータ＞')
30print('0の件数：%d'%len(creditcard.query(f'{target}==0')))
31print('1の件数：%d'%len(creditcard.query(f'{target}==1')))
32print('*'*20)
33print('＜アンダーサンプリング後のデータ＞')
34print('0の件数：%d'%len(under_sampling.query(f'{target}==0')))
35print('1の件数：%d'%len(under_sampling.query(f'{target}==1')));
36
37def show_confusion_matrix(y_test, y_pred):
38    print("\n", confusion_matrix(y_test, y_pred))
39
40def modeling(data):
41   X = data.drop('Class',axis=1)
42   y = data['Class']
43
44   # データ分割
45   X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.1, random_state=42)
46
47   # ロジスティック回帰
48   lr = LogisticRegression(max_iter=1000)
49   lr.fit(X_train, y_train)
50   y_pred = lr.predict(X_test)
51
52   # 混同行列
53   matrix = show_confusion_matrix(y_test, y_pred)
54   print(matrix)
55
56   # 評価
57   print('Accuracy = ', accuracy_score(y_true=y_test, y_pred=y_pred).round(decimals=3))
58   print('Precision = ', precision_score(y_true=y_test, y_pred=y_pred).round(decimals=3))
59   print('Recall = ', recall_score(y_true=y_test, y_pred=y_pred).round(decimals=3))
60   print('F1 score = ', f1_score(y_true=y_test, y_pred=y_pred).round(decimals=3))
61
62   return lr, y_pred, X_train, X_test, y_train, y_test
63
64# ROC曲線
65def plot_roc_curve(pred, y_test):
66   pred =  np.where(pred > 0.5, 1, 0.5)
67   fpr, tpr, thresholds = metrics.roc_curve(pred, y_test)
68   plt.figure(figsize=[15, 5])
69   plt.plot(fpr, tpr, label=f'roc_curve')
70   plt.xlabel('FPR')
71   plt.ylabel('TPR')
72   plt.legend()
73   plt.grid()
74   plt.show()
75
76   auc = metrics.auc(fpr, tpr)
77   print('auc: {:.3f}'.format(auc))
78
79# PR（Precision-Recall）曲線
80def plot_precision_recall_curve(y_test, y_pred):
81   precision, recall, thresholds = metrics.precision_recall_curve(y_test, y_pred)
82
83   auc = metrics.auc(recall, precision)
84   print(f'AUC： {round(auc,3)}%')
85
86   plt.figure(figsize=[15, 5])
87   plt.plot(recall, precision, label='PR curve (area = %.2f)'%auc)
88   plt.legend()
89   plt.title('PR curve')
90   plt.xlabel('Recall')
91   plt.ylabel('Precision')
92   plt.grid()
93   plt.show();
94
95# アンダーサンプリング
96lr, y_pred, X_train, X_test, y_train, y_test = modeling(under_sampling)
97
98# ROC曲線_エラー発生
99plot_roc_curve(y_pred,y_test)
100
101# Precision-Recall曲線
102plot_precision_recall_curve(y_test, y_pred)

4.自分で調べたことや試したこと
Google検索では、有用な情報なし。

5.使っているツールのバージョンなど補足情報
開発環境：Google Colaboratory
プログラム言語：python3
OS：windows10 Home
CPU：Intel(R) Core(TM) i7-7500U CPU@2.70GHz 2.90GHz

行動規範の内容に同意します

回答2件

自己解決

def plot_roc_curveのところで、第１引数を予測値にしたのが原因でした。
(y_test, y_pred)で、正常にプロットされました。

投稿2022/08/10 08:02

kouji_39

総合スコア164

roc_curve関数に渡しているpredの型(type_of_target)がcontinuousなので提示エラーが発生しています。
動作未検証ですが、提示コードでのpred = np.where(pred > 0.5, 1, 0.5)の部分は、参考URLではpred = np.where(predict > 0.5, 1, 0)となっているので、そのように修正すれば（binaryと解釈されて）動作するものと思われます。

投稿2022/08/10 01:26

編集2022/08/10 01:27