matplotlibで条件によって色分けをして凡例を追加したい

前提・実現したいこと

　機械学習手法のロジスティック回帰と機械学習ライブラリsklearnを使って、精度判定や影響度を出力しています。
matplotlibを使って、影響度が負の時を赤色でグラフ表示、正の時青色でグラフ表示するところまでできました。
更にやりたいことは、図3のグラフから赤の時故障、青の時正常、と凡例を追加することです。
解決方法があればご回答お願いします。

発生している問題

グラフが赤の時故障、青の時正常と凡例に追加したいが、うまくいっていない

該当のソースコード

python3
1print("*** LogisticRegression.pyの実行 ***")
2print("Step1. ライブラリのインポート")
3
4# ***** ライブラリのインポート *****
5import warnings
6# 余分なワーニングを非表示にする
7warnings.filterwarnings('ignore')
8
9import pickle
10import os
11import japanize_matplotlib
12import matplotlib.pyplot as plt
13from sklearn.linear_model import LogisticRegression
14from sklearn.preprocessing import StandardScaler
15from sklearn.metrics import roc_auc_score
16from sklearn.model_selection import train_test_split
17import csv
18import pandas as pd
19
20
21# ***** 入力ファイルの読み込み *****
22print("Step2. 入力ファイルの読み込み")
23INPUT_FILE = os.path.join(os.getcwd(), "input_file")
24OUTPUT_FILE = os.path.join(os.getcwd(), "output_file")
25os.makedirs(OUTPUT_FILE, exist_ok=True)
26
27THRESH = 0.3  # 故障確率をいくつ以上を故障と判定するか指定
28df = pd.read_csv(os.path.join(INPUT_FILE, 'train.csv'))
29df.head() #図1
30
31print("Step3. 入力データ前処理")
32# 製品故障
33df['状態(予測対象)'] = df['状態(予測対象)'].map({'故障': 0, '正常': 1})
34# 購入の経過月数 131.0month -> 131.0
35df['購入からの経過月数'] = df['購入からの経過月数'].str.replace('month', '').astype(float)
36
37# 機器タイプ
38m_type = pd.get_dummies(df['機器タイプ'], drop_first=True, prefix='機器')
39# 保守担当チーム
40team=pd.get_dummies(df['保守担当チーム'],drop_first=True,prefix='チーム')
41# 表の統合
42df_tmp=df.drop(['機器タイプ','保守担当チーム'],axis=1)
43df_merge=pd.concat([df_tmp,m_type,team],axis=1)
44df_merge.head() #図2
45
46col = ['機器_B','機器_C','チーム_Team1-2','チーム_Team2-1','チーム_Team2-2','購入からの経過月数', '稼働時平均温度',
47       '稼働時平均湿度', '油圧メーター値']
48x = df_merge[col]
49t = df_merge['状態(予測対象)']
50#訓練データ標準化
51sc=StandardScaler()
52new=sc.fit_transform(x)
53
54# ***** 学習 *****
55print("Step4. モデル学習")
56x_train, x_test, y_train, y_test = train_test_split(new, t,
57                                                    test_size=0.2, random_state=0)
58
59#学習
60model = LogisticRegression(random_state=0, C=0.1,
61                           multi_class="auto", solver="lbfgs")
62model.fit(x_train, y_train)
63print(f'訓練データ件数{len(y_train)} 検証データ件数{len(y_test)}')
64
65# ***** 精度評価 *****
66print("Step5. 精度評価")
67#正解率
68score = model.score(x_train, y_train)
69score2 = model.score(x_test, y_test)
70print(f"Train {score:.2%}")
71print(f"Test {score2:.2%}")
72
73#影響度を知る
74feature=pd.DataFrame(model.coef_[0],col)
75feature.to_csv(os.path.join(OUTPUT_FILE,"feature.csv"),header=False,index=True,encoding="shift-jis")
76
77#影響度をグラフ化する
78plt.figure(figsize=(15,5))
79plt.title("故障の影響度")
80plt.xlabel("要因")
81plt.ylabel('故障/正常の影響度')
82#影響度が正の時青,影響度が負の時赤にする
83color=[('r' if model.coef_[0][i]<0 else 'b') for i in range(len(model.coef_[0]))]
84#影響度を絶対値にして表示
85plt.bar(col,np.abs(model.coef_[0]),width=0.5,color=color)　図3
86
87
88'''
89影響度が正の時正常、負の時故障と凡例に表示させたい
90
91
92'''
93
94#plt.legend(loc='upper center')
95plt.show()
96

図1 入力ファイル

図2 カテゴリ変数をダミー変数化した

図3 影響度をグラフ出力したもの
ここから凡例を追加したい

試したこと

以下のコードで場合分けしてみたが、凡例が期待通りの動作にならなかった。
図4では、凡例が9個も出てしまっているが、凡例を赤と青の2つだけにしたい。

python3
1plt.figure(figsize=(15,5))
2for i in range(len(model.coef_[0])):
3    if model.coef_[0][i]<0:
4        plt.bar(col[i],model.coef_[0][i],width=0.5,color="r",label="故障")
5    else:
6        plt.bar(col[i],model.coef_[0][i],width=0.5,color="b",label="正常")
7plt.legend(loc='upper left')
8plt.show() #図4

図4 凡例失敗図

補足情報（FW/ツールのバージョンなど）

使用OS windows10
pythonヴァージョン python 3.9.7 32bit環境
使用ライブラリ
sklearn, matplotlib, pandas

参考
ロジスティック回帰分析で特徴量の重要度を知る方法
(https://teratail.com/questions/263204)

行動規範の内容に同意します

回答1件

ベストアンサー

データがないので適当な数値を入れています。

python
1import matplotlib.pyplot as plt
2import matplotlib
3import numpy as np
4matplotlib.rc('font', family='BIZ UDGothic')
5
6col = ['機器_B','機器_C','チーム_Team1-2','チーム_Team2-1','チーム_Team2-2','購入からの経過月数', '稼働時平均温度',
7       '稼働時平均湿度', '油圧メーター値']
8
9coef = np.array([-0.06,-0.06,0.2,0.2,0.28,-0.8,-0.31,-0.1,0.7])
10
11plt.figure(figsize=(15,5))
12coef_p = np.where(coef>=0, coef, 0)
13coef_n = np.where(coef<0, -coef, 0)
14
15plt.bar(col,coef_p,width=0.5, label='positive')
16plt.bar(col,coef_n,width=0.5, label='negative',bottom=coef_p)
17plt.legend()
18plt.show()