python3 sklearnでk近傍法を用いた際の予測結果確認方法(機械学習に詳しい方お願いします)

###前提・実現したいこと
python3.6でk近傍法を用いた予測モデルを作っています。
自分で作ったcsvファイルのresultの列が0と1のデータで、
これを予測させるようにプログラミングしたのですが、
k近傍法を用いた際に、scoreだけじゃなくて、
1と予測したデータが0だった割合や、0と予測したデータが1だった割合なども知りたいです。
ご教授宜しくおねがいします。
csvファイルは
https://dotup.org/uploda/dotup.org1766434.csv.html
にアップロードしました。

###該当のソースコード

#!/usr/bin/python3
# -*- coding: utf-8 -*-
from pprint import pprint  #きれいに表示してくれる
import csv  #csvファイルのライブラリ
import numpy as np
import pandas as pd
import sklearn.model_selection as MS
from sklearn import preprocessing
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier as KNC
while 1:
    df = pd.read_csv('kaisekidata.csv', engine='python')
    print(df.head())
    train_y, test_y, train_x, test_x = MS.train_test_split(df['result'], df.drop('result', axis=1), test_size=0.2, train_size=0.8)
    clf = KNC(n_neighbors=3) 
    clf.fit(train_x, train_y) # 分類器の学習
    print(clf.score(test_x, test_y))
    print('全データの数: %d' % len(df))
    print('訓練データの数: %d' % len(train_y))
    print('テストデータの数: %d' % len(test_y))
    print('特徴量(説明変数)の数: %d' % (df.shape[1]-1))
    break

###補足情報(言語/FW/ツール等のバージョンなど)
cloud9のpython3.6を使って作っています。

行動規範の内容に同意します

回答1件

ベストアンサー

1と予測したデータが0だった割合や、0と予測したデータが1だった割合なども知りたいです

混合行列を作成すればよいと思います。
混合行列についてご存知ない場合はこちらに見方が書いてあります。
sklearn では、 from sklearn.metrics import confusion_matrix で計算できます。

python
1import matplotlib.pyplot as plt
2import numpy as np
3import pandas as pd
4import seaborn as sns
5from sklearn.metrics import confusion_matrix
6from sklearn.neighbors import KNeighborsClassifier
7
8sns.set()
9
10df = pd.read_csv('data.csv')
11
12# 学習データ、テストデータに分割する。
13train_x, test_x, train_y, test_y = MS.train_test_split(
14    df.drop('result', axis=1), df['result'], test_size=0.2)
15
16clf = KNeighborsClassifier(n_neighbors=3)
17clf.fit(train_x, train_y)
18
19# テストデータを推論する。
20pred_y = clf.predict(test_x)
21
22# 混合行列を作成する。
23cm = confusion_matrix(test_y, pred_y)
24
25
26# 混合行列を描画する。
27def print_confusion_matrix(confusion_matrix, class_names):
28    heatmap = sns.heatmap(
29        confusion_matrix, xticklabels=class_names, yticklabels=class_names,
30        annot=True, fmt='d', cbar=False, square=True)
31    plt.ylabel('True label')
32    plt.xlabel('Predicted label')
33    plt.show()
34
35
36labels = ['class {}'.format(i) for i in df['result'].unique()]
37print_confusion_matrix(cm, labels)

数でなく、割合で混合行列を表示したい場合

python
1import matplotlib.pyplot as plt
2import numpy as np
3import pandas as pd
4import seaborn as sns
5from sklearn.metrics import confusion_matrix
6from sklearn.neighbors import KNeighborsClassifier
7
8sns.set()
9
10df = pd.read_csv('data.csv')
11
12# 学習データ、テストデータに分割する。
13train_x, test_x, train_y, test_y = MS.train_test_split(
14    df.drop('result', axis=1), df['result'], test_size=0.2)
15
16clf = KNeighborsClassifier(n_neighbors=3)
17clf.fit(train_x, train_y)
18
19# テストデータを推論する。
20pred_y = clf.predict(test_x)
21
22# 混合行列を作成する。
23cm = confusion_matrix(test_y, pred_y)
24cm = cm / cm.sum()
25
26# 混合行列を描画する。
27def print_confusion_matrix(confusion_matrix, class_names):
28    heatmap = sns.heatmap(
29        confusion_matrix, xticklabels=class_names, yticklabels=class_names,
30        annot=True, fmt='.2%', cbar=False, square=True, cmap='YlGnBu')
31    plt.ylabel('True label')
32    plt.xlabel('Predicted label')
33    plt.show()
34
35
36labels = ['class {}'.format(i) for i in df['result'].unique()]
37print_confusion_matrix(cm, labels)