ニューラルネットワークの分類問題

ゼロから作るDeep Learning（オライリー・ジャパン）のchapter4 Mnistのクラス分類のニューラルネットワークを改良して、scikit-learn のワインデータを分類しようとしましたが、学習がすすみません。モデルや、逆伝播のロジックは本のままで、投入データの部分を改良しています。どこが間違っているのでしょうか。
学習後の重みパラメータ"W2"は、ありえない値になっしまいます。

Python
1import numpy as np
2import matplotlib.pyplot as plt
3import pandas as pd
4from sklearn.model_selection import train_test_split
5from sklearn import linear_model, datasets
6
7def sigmoid(x):
8    return 1 / (1 + np.exp(-x))    
9
10def sigmoid_grad(x):
11    return (1.0 - sigmoid(x)) * sigmoid(x)
12
13def softmax(x):
14    if x.ndim == 2:
15        x = x.T
16        x = x - np.max(x, axis=0)
17        y = np.exp(x) / np.sum(np.exp(x), axis=0)
18        return y.T 
19
20    x = x - np.max(x) 
21    return np.exp(x) / np.sum(np.exp(x))
22
23def cross_entropy_error(y, t):
24    if y.ndim == 1:
25        t = t.reshape(1, t.size)
26        y = y.reshape(1, y.size)
27        
28    # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換
29    if t.size == y.size:
30        t = t.argmax(axis=1)
31             
32    batch_size = y.shape[0]
33    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
34
35class TwoLayerNet:
36
37    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
38        # 重みの初期化
39        self.params = {}
40        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
41        self.params['b1'] = np.zeros(hidden_size)
42        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
43        self.params['b2'] = np.zeros(output_size)
44
45    def predict(self, x):
46        W1, W2 = self.params['W1'], self.params['W2']
47        b1, b2 = self.params['b1'], self.params['b2']
48    
49        a1 = np.dot(x, W1) + b1
50        z1 = sigmoid(a1)
51        a2 = np.dot(z1, W2) + b2
52        y = softmax(a2)
53        
54        return y
55        
56    # x:入力データ, t:教師データ
57    def loss(self, x, t):
58        y = self.predict(x)
59        
60        return cross_entropy_error(y, t)
61    
62    def accuracy(self, x, t):
63        y = self.predict(x)
64        y = np.argmax(y, axis=1)
65        t = np.argmax(t, axis=1)
66        
67        accuracy = np.sum(y == t) / float(x.shape[0])
68        return accuracy
69        
70    # x:入力データ, t:教師データ
71    def gradient(self, x, t):
72        W1, W2 = self.params['W1'], self.params['W2']
73        b1, b2 = self.params['b1'], self.params['b2']
74        grads = {}
75        
76        batch_num = x.shape[0]
77        
78        # forward
79        a1 = np.dot(x, W1) + b1; #print("a1",end=":");print(a1)
80        z1 = sigmoid(a1); #print("z1",end=":");#print(z1)
81        a2 = np.dot(z1, W2) + b2;#print("a2",end=":");print(a2)
82        y = softmax(a2);#print("y",end=":");print(y)
83        
84        # backward
85        dy = (y - t) / batch_num; 
86        grads['W2'] = np.dot(z1.T, dy); #print("np.dot(z1.T, dy): " + str(grads['W2']))
87        grads['b2'] = np.sum(dy, axis=0); #print("np.sum(dy, axis=0)" + str(grads['b2']))
88        
89        da1 = np.dot(dy, W2.T); #print("np.dot(dy, W2.T)" + str(da1))
90        dz1 = sigmoid_grad(a1) * da1 ; #print("sigmoid_grad(a1) * da1" + str(dz1))
91        grads['W1'] = np.dot(x.T, dz1) ; #print("np.dot(x.T, dz1)" + str(grads['W1']))
92        grads['b1'] = np.sum(dz1, axis=0) ; #print("np.sum(dz1, axis=0)" + str(grads['b1']))
93        return grads
94
95# データの読み込み
96load_data = datasets.load_wine()
97features = pd.DataFrame(data=load_data.data, columns=load_data.feature_names)
98targets = pd.DataFrame(data=load_data.target, columns=['class'])
99x_train, x_test, t_train, t_test = train_test_split(features, targets, test_size=0.1)
100
101network = TwoLayerNet(input_size=13, hidden_size=100, output_size=4)
102
103iters_num = 1000  # 繰り返しの回数を適宜設定する
104train_size = x_train.shape[0]
105batch_size = 8
106learning_rate = 0.01
107
108train_loss_list = []
109train_acc_list = []
110test_acc_list = []
111
112iter_per_epoch = max(train_size / batch_size, 1)
113
114cnt = 0
115for i in range(iters_num):
116    cnt+=1
117    batch_mask = np.random.choice(train_size, batch_size)
118    x_batch = x_train.values[batch_mask]
119    t_batch = t_train.values[batch_mask]
120
121    # 勾配の計算
122    grad = network.gradient(x_batch, t_batch)
123    
124    # パラメータの更新
125    for key in ('W1', 'b1', 'W2', 'b2'):
126        network.params[key] -= learning_rate * grad[key]
127    
128    loss = network.loss(x_batch, t_batch);#print("loss: " + str(loss))
129    train_loss_list.append(loss)
130    print(loss)
131
132    if i % iter_per_epoch == 0 and True:
133        train_acc = network.accuracy(x_train.values, t_train.values)
134        test_acc = network.accuracy(x_test.values, t_test.values)
135        train_acc_list.append(train_acc)
136        test_acc_list.append(test_acc)
137        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))
138print("W1")
139print(network.params["W1"])
140print("b1")
141print(network.params["b1"])
142print("W2")
143print(network.params["W2"])
144print("b2")
145print(network.params["b2"])
146
147# グラフの描画
148markers = {'train': 'o', 'test': 's'}
149x = np.arange(len(train_acc_list))
150plt.plot(x, train_acc_list, label='train acc')
151plt.plot(x, test_acc_list, label='test acc', linestyle='--')
152plt.xlabel("epochs")
153plt.ylabel("accuracy")
154plt.ylim(0, 1.0)
155plt.legend(loc='lower right')
156plt.show()
157
158

行動規範の内容に同意します

回答1件

自己解決

データ読み込み後、正規化と、One-hot-vector処理をすると、うまくいきました。

Python
1for each in features.columns:
2    mean, std = features[each].mean(), features[each].std()
3    features.loc[:, each] = (features[each] - mean)/std

Python
1targets = pd.get_dummies(targets['class'], prefix='class')

投稿2019/04/01 11:30

teefpc

総合スコア111

あなたの回答

tips

プレビュー

行動規範の内容に同意します

質問の解決につながる回答をしましょう。サンプルコードなど、より具体的な説明があると質問者の理解の助けになります。また、読む側のことを考えた、分かりやすい文章を心がけましょう。

15分調べてもわからないことは
teratailで質問しよう！

ただいまの回答率
85.48%

質問をまとめることで
思考を整理して素早く解決

テンプレート機能で
簡単に質問をまとめる

質問する

質問をすることでしか得られない、回答やアドバイスがある。

15分調べてもわからないことは、質問しよう！

ニューラルネットワークの分類問題

関連した質問