質問編集履歴
3
学習結果の画像添付
test
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
y=x^2の学習が進まない
|
test
CHANGED
@@ -9,7 +9,9 @@
|
|
9
9
|
### 発生している問題・エラーメッセージ
|
10
10
|
|
11
11
|
学習結果をプロットしてみてみると、y=x^2のあたいとはかけ離れてしまっています。
|
12
|
-
|
12
|
+

|
13
|
+
|
14
|
+

|
13
15
|
### 該当のソースコード
|
14
16
|
|
15
17
|
```python
|
2
コードの変更
test
CHANGED
@@ -1 +1 @@
|
|
1
|
-
Pythonの
|
1
|
+
Pythonのy=x^2の学習
|
test
CHANGED
@@ -5,59 +5,132 @@
|
|
5
5
|
### 前提
|
6
6
|
|
7
7
|
Pythonで基本的なNumpyの使い方について勉強しています。試行錯誤しながらコードを書いているのですがいまいちうまくいきません。
|
8
|
-
ネットワークサイズを[10,10,10]にしたいです。
|
9
8
|
|
10
9
|
### 発生している問題・エラーメッセージ
|
10
|
+
|
11
|
-
|
11
|
+
学習結果をプロットしてみてみると、y=x^2のあたいとはかけ離れてしまっています。
|
12
|
-
|
13
|
-
ValueError Traceback (most recent call last)
|
14
|
-
Cell In[7], line 163
|
15
|
-
159 t_batch = t_train[batch_mask]
|
16
|
-
161 # 勾配の計算
|
17
|
-
162 #grad = network.numerical_gradient(x_batch, t_batch)
|
18
|
-
--> 163 grad = network.gradient(x_batch, t_batch)
|
19
|
-
165 # パラメータの更新
|
20
|
-
166 for key in ('W1', 'b1', 'W2', 'b2'):
|
21
|
-
|
22
|
-
Cell In[7], line 92, in TwoLayerNet.gradient(self, x, t)
|
23
|
-
89 batch_num = x.shape[0]
|
24
|
-
91 # forward
|
25
|
-
---> 92 a1 = np.dot(x, W1) + b1
|
26
|
-
93 z1 = sigmoid(a1)
|
27
|
-
94 a2 = np.dot(z1, W2) + b2
|
28
|
-
|
29
|
-
File <__array_function__ internals>:200, in dot(*args, **kwargs)
|
30
|
-
|
31
|
-
ValueError: shapes (100,1) and (10,10) not aligned: 1 (dim 1) != 10 (dim 0)
|
32
12
|
|
33
13
|
### 該当のソースコード
|
34
14
|
|
35
15
|
```python
|
16
|
+
import numpy as np
|
17
|
+
import matplotlib.pyplot as plt
|
18
|
+
from sklearn.model_selection import train_test_split
|
19
|
+
|
20
|
+
def sigmoid(x):
|
21
|
+
return 1 / (1 + np.exp(-x))
|
22
|
+
def sigmoid_grad(x):
|
23
|
+
return (1.0 - sigmoid(x)) * sigmoid(x)
|
24
|
+
def mean_squared_error(y, t):
|
25
|
+
return 0.5 * np.sum((y - t)**2)
|
26
|
+
|
27
|
+
class Adam:
|
28
|
+
def __init__(self, lr=0.01, beta1=0.9, beta2=0.999):
|
29
|
+
self.lr = lr
|
30
|
+
self.beta1 = beta1
|
31
|
+
self.beta2 = beta2
|
32
|
+
self.iter = 0
|
33
|
+
self.m = None
|
34
|
+
self.v = None
|
35
|
+
|
36
|
+
def update(self, params, grads):
|
37
|
+
if self.m is None:
|
38
|
+
self.m, self.v = {}, {}
|
39
|
+
for key, val in params.items():
|
40
|
+
self.m[key] = np.zeros_like(val)
|
41
|
+
self.v[key] = np.zeros_like(val)
|
42
|
+
|
43
|
+
self.iter += 1
|
44
|
+
lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
|
45
|
+
|
46
|
+
for key in params.keys():
|
47
|
+
#self.m[key] = self.beta1*self.m[key] + (1-self.beta1)*grads[key]
|
48
|
+
#self.v[key] = self.beta2*self.v[key] + (1-self.beta2)*(grads[key]**2)
|
49
|
+
self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])
|
50
|
+
self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])
|
51
|
+
|
52
|
+
params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7)
|
53
|
+
|
54
|
+
class Affine:
|
55
|
+
def __init__(self, W, b):
|
56
|
+
self.W =W
|
57
|
+
self.b = b
|
58
|
+
|
59
|
+
self.x = None
|
60
|
+
self.original_x_shape = None
|
61
|
+
# 重み・バイアスパラメータの微分
|
62
|
+
self.dW = None
|
63
|
+
self.db = None
|
64
|
+
|
65
|
+
def forward(self, x):
|
66
|
+
# テンソル対応
|
67
|
+
self.original_x_shape = x.shape
|
68
|
+
x = x.reshape(x.shape[0], -1)
|
69
|
+
self.x = x
|
70
|
+
|
71
|
+
out = np.dot(self.x, self.W) + self.b
|
72
|
+
|
73
|
+
return out
|
74
|
+
|
75
|
+
def backward(self, dout):
|
76
|
+
dx = np.dot(dout, self.W.T)
|
77
|
+
self.dW = np.dot(self.x.T, dout)
|
78
|
+
self.db = np.sum(dout, axis=0)
|
79
|
+
|
80
|
+
dx = dx.reshape(*self.original_x_shape) # 入力データの形状に戻す(テンソル対応)
|
81
|
+
return dx
|
82
|
+
|
83
|
+
# データセットの生成
|
84
|
+
from sklearn.model_selection import train_test_split
|
85
|
+
np.random.seed(42)#今回はシード42
|
86
|
+
x = np.random.uniform(-1, 1, size=(100, 1))
|
87
|
+
t = x ** 2
|
88
|
+
X = np.arange(-1, 1, 0.01)
|
89
|
+
Y = X ** 2
|
90
|
+
|
91
|
+
# データセットの分割
|
92
|
+
x_train, x_val, x_test = np.split(x, [int(len(x)*0.5), int(len(x)*0.75)])
|
93
|
+
t_train, t_val, t_test = np.split(t, [int(len(y)*0.5), int(len(y)*0.75)])
|
94
|
+
# 結果の表示
|
95
|
+
#print("x_train:", x_train.shape)
|
96
|
+
#print("x_val:", x_val.shape)
|
97
|
+
#print("x_test:", x_test.shape)
|
98
|
+
|
99
|
+
import numpy as np
|
36
100
|
class TwoLayerNet:
|
101
|
+
def sigmoid(x):
|
102
|
+
return 1 / (1 + np.exp(-x))
|
37
|
-
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
|
103
|
+
def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, output_size, weight_init_std=0.01):
|
38
|
-
|
104
|
+
# 重みの初期化
|
39
105
|
self.params = {}
|
40
|
-
self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
|
106
|
+
self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size1)
|
41
|
-
self.params['b1'] = np.zeros(hidden_size)
|
107
|
+
self.params['b1'] = np.zeros(hidden_size1)
|
108
|
+
self.params['W2'] = weight_init_std * np.random.randn(hidden_size1, hidden_size2)
|
109
|
+
self.params['b2'] = np.zeros(hidden_size2)
|
110
|
+
self.params['W3'] = weight_init_std * np.random.randn(hidden_size2, hidden_size3)
|
111
|
+
self.params['b3'] = np.zeros(hidden_size3)
|
42
|
-
self.params['W
|
112
|
+
self.params['W4'] = weight_init_std * np.random.randn(hidden_size3, output_size)
|
43
|
-
self.params['b
|
113
|
+
self.params['b4'] = np.zeros(output_size)
|
44
114
|
|
45
115
|
def predict(self, x):
|
46
|
-
W1, W2 = self.params['W1'], self.params['W2']
|
116
|
+
W1, W2, W3, W4 = self.params['W1'], self.params['W2'], self.params['W3'], self.params['W4']
|
47
|
-
b1, b2 = self.params['b1'], self.params['b2']
|
117
|
+
b1, b2, b3, b4 = self.params['b1'], self.params['b2'], self.params['b3'], self.params['b4']
|
48
118
|
|
49
119
|
a1 = np.dot(x, W1) + b1
|
50
120
|
z1 = sigmoid(a1)
|
51
121
|
a2 = np.dot(z1, W2) + b2
|
52
|
-
|
122
|
+
z2 = sigmoid(a2)
|
53
|
-
|
123
|
+
a3 = np.dot(z2, W3) + b3
|
124
|
+
z3 = sigmoid(a3)
|
125
|
+
a4 = np.dot(z3, W4) + b4
|
126
|
+
y = (a4)
|
127
|
+
|
54
128
|
return y
|
55
|
-
|
129
|
+
|
56
|
-
|
130
|
+
# x:入力データ, t:教師データ
|
57
131
|
def loss(self, x, t):
|
58
132
|
y = self.predict(x)
|
59
|
-
|
60
|
-
return
|
133
|
+
return mean_squared_error(y, t)
|
61
134
|
|
62
135
|
def accuracy(self, x, t):
|
63
136
|
y = self.predict(x)
|
@@ -70,46 +143,116 @@
|
|
70
143
|
# x:入力データ, t:教師データ
|
71
144
|
def numerical_gradient(self, x, t):
|
72
145
|
loss_W = lambda W: self.loss(x, t)
|
73
|
-
|
74
146
|
grads = {}
|
75
147
|
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
|
76
148
|
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
|
77
149
|
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
|
78
150
|
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
|
151
|
+
grads['W3'] = numerical_gradient(loss_W, self.params['W3'])
|
152
|
+
grads['b3'] = numerical_gradient(loss_W, self.params['b3'])
|
153
|
+
grads['W4'] = numerical_gradient(loss_W, self.params['W4'])
|
154
|
+
grads['b4'] = numerical_gradient(loss_W, self.params['b4'])
|
79
155
|
|
80
156
|
return grads
|
81
|
-
|
157
|
+
|
82
158
|
def gradient(self, x, t):
|
83
|
-
W1, W2 = self.params['W1'], self.params['W2']
|
159
|
+
W1, W2, W3, W4 = self.params['W1'], self.params['W2'], self.params['W3'], self.params['W4']
|
84
|
-
b1, b2 = self.params['b1'], self.params['b2']
|
160
|
+
b1, b2, b3, b4 = self.params['b1'], self.params['b2'], self.params['b3'], self.params['b4']
|
85
161
|
grads = {}
|
86
|
-
|
162
|
+
|
87
163
|
batch_num = x.shape[0]
|
88
|
-
|
164
|
+
|
89
165
|
# forward
|
90
166
|
a1 = np.dot(x, W1) + b1
|
91
167
|
z1 = sigmoid(a1)
|
92
168
|
a2 = np.dot(z1, W2) + b2
|
169
|
+
z2 = sigmoid(a2)
|
170
|
+
a3 = np.dot(z2, W3) + b3
|
171
|
+
z3 = sigmoid(a3)
|
172
|
+
a4 = np.dot(z3, W4) + b4
|
93
|
-
y = so
|
173
|
+
y = sigmoid(a4)
|
94
|
-
|
174
|
+
|
95
175
|
# backward
|
96
176
|
dy = (y - t) / batch_num
|
97
|
-
grads['W
|
177
|
+
grads['W4'] = np.dot(z3.T, dy * sigmoid_grad(a4))
|
98
|
-
grads['b
|
178
|
+
grads['b4'] = np.sum(dy * sigmoid_grad(a4), axis=0)
|
179
|
+
|
99
|
-
|
180
|
+
dz3 = np.dot(dy * sigmoid_grad(a4), W4.T)
|
181
|
+
da3 = dz3 * sigmoid_grad(a3)
|
182
|
+
grads['W3'] = np.dot(z2.T, da3 * sigmoid_grad(a3))
|
183
|
+
grads['b3'] = np.sum(da3 * sigmoid_grad(a3), axis=0)
|
184
|
+
|
185
|
+
dz2 = np.dot(da3 * sigmoid_grad(a3), W3.T)
|
186
|
+
da2 = dz2 * sigmoid_grad(a2)
|
187
|
+
grads['W2'] = np.dot(z1.T, da2 * sigmoid_grad(a2))
|
188
|
+
grads['b2'] = np.sum(da2 * sigmoid_grad(a2), axis=0)
|
189
|
+
|
100
|
-
dz1 = np.dot(d
|
190
|
+
dz1 = np.dot(da2 * sigmoid_grad(a2), W2.T)
|
101
|
-
da1 = sigmoid_grad(a1)
|
191
|
+
da1 = dz1 * sigmoid_grad(a1)
|
102
|
-
grads['W1'] = np.dot(x.T, da1)
|
192
|
+
grads['W1'] = np.dot(x.T, da1 * sigmoid_grad(a1))
|
103
|
-
grads['b1'] = np.sum(da1, axis=0)
|
193
|
+
grads['b1'] = np.sum(da1 * sigmoid_grad(a1), axis=0)
|
104
|
-
|
194
|
+
|
105
|
-
return
|
195
|
+
return grads
|
106
|
-
|
196
|
+
|
107
|
-
network = TwoLayerNet(input_size=10, hidden_size=10, output_size=1
|
197
|
+
network = TwoLayerNet(input_size=1, hidden_size1=10, hidden_size2=10, hidden_size3=10, output_size=1)
|
198
|
+
|
199
|
+
#学習
|
200
|
+
def sigmoid(x):
|
201
|
+
return 1 / (1 + np.exp(-x))
|
202
|
+
def sigmoid_grad(x):
|
203
|
+
return (1.0 - sigmoid(x)) * sigmoid(x)
|
204
|
+
def mean_squared_error(y, t):
|
205
|
+
return 0.5 * np.sum((y - t)**2)
|
206
|
+
def softmax(x):
|
207
|
+
x = x - np.max(x, axis=-1, keepdims=True) # オーバーフロー対策
|
208
|
+
return np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True)
|
209
|
+
|
210
|
+
iters_num = 10000 # 繰り返しの回数を適宜設定する
|
211
|
+
train_size = x_train.shape[0]
|
212
|
+
batch_size = 10
|
213
|
+
learning_rate = 0.1
|
214
|
+
|
215
|
+
train_loss_list = []
|
216
|
+
train_acc_list = []
|
217
|
+
test_acc_list = []
|
218
|
+
|
219
|
+
iter_per_epoch = max(train_size / batch_size, 1)
|
220
|
+
|
221
|
+
for i in range(iters_num):
|
222
|
+
batch_mask = np.random.choice(train_size, batch_size)
|
223
|
+
x_batch = x_train[batch_mask]
|
224
|
+
y_batch = t_train[batch_mask]
|
225
|
+
|
226
|
+
# 勾配の計算
|
227
|
+
#grad = network.numerical_gradient(x_batch, y_batch)
|
228
|
+
grad = network.gradient(x_batch, y_batch)
|
229
|
+
|
230
|
+
# パラメータの更新
|
231
|
+
for key in ('W1', 'b1', 'W2', 'b2', 'W3', 'b3', 'W4', 'b4'):
|
232
|
+
network.params[key] -= learning_rate * grad[key]
|
233
|
+
|
234
|
+
loss = network.loss(x_batch, y_batch)
|
235
|
+
train_loss_list.append(loss)
|
236
|
+
|
237
|
+
optimizer = Adam
|
238
|
+
y_pred = network.predict(x_test)
|
239
|
+
|
240
|
+
import matplotlib.pyplot as plt
|
241
|
+
plt.plot(x_test,y_pred,marker='.',ls='',label='y_pred')
|
242
|
+
plt.grid()
|
243
|
+
plt.xlabel('x_test',fontsize='16')
|
244
|
+
plt.ylabel('y_pred',fontsize='16')
|
245
|
+
plt.show()
|
246
|
+
plt.close()
|
247
|
+
|
248
|
+
plt.plot(train_loss_list)
|
249
|
+
plt.xlabel('epoc')
|
250
|
+
plt.ylabel('loss')
|
251
|
+
plt.show()
|
108
252
|
```
|
109
|
-
|
110
253
|
### 試したこと
|
111
254
|
|
112
|
-
|
255
|
+
Affinをうまく適用しようとしたのですが、いまいち使い方が分かっていません
|
113
256
|
|
114
257
|
### 補足情報(FW/ツールのバージョンなど)
|
115
258
|
|
1
改行
test
CHANGED
File without changes
|
test
CHANGED
@@ -32,8 +32,7 @@
|
|
32
32
|
|
33
33
|
### 該当のソースコード
|
34
34
|
|
35
|
-
|
35
|
+
```python
|
36
|
-
|
37
36
|
class TwoLayerNet:
|
38
37
|
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
|
39
38
|
# 重みの初期化
|
@@ -106,7 +105,7 @@
|
|
106
105
|
return
|
107
106
|
|
108
107
|
network = TwoLayerNet(input_size=10, hidden_size=10, output_size=10)
|
109
|
-
|
108
|
+
```
|
110
109
|
|
111
110
|
### 試したこと
|
112
111
|
|