質問編集履歴

3

学習結果の画像添付

2023/04/07 01:32

投稿

aky
aky

スコア0

test CHANGED
@@ -1 +1 @@
1
- Pythonのy=x^2の学習
1
+ y=x^2の学習が進まない
test CHANGED
@@ -9,7 +9,9 @@
9
9
  ### 発生している問題・エラーメッセージ
10
10
 
11
11
  学習結果をプロットしてみてみると、y=x^2のあたいとはかけ離れてしまっています。
12
-
12
+ ![イメージ説明](https://ddjkaamml8q8x.cloudfront.net/questions/2023-04-07/30d8dd18-a2c1-40c6-a9cc-5b210abef75f.png)
13
+
14
+ ![イメージ説明](https://ddjkaamml8q8x.cloudfront.net/questions/2023-04-07/47fd7d34-2742-4167-b0df-ea6b33b9dabc.png)
13
15
  ### 該当のソースコード
14
16
 
15
17
  ```python

2

コードの変更

2023/04/06 06:29

投稿

aky
aky

スコア0

test CHANGED
@@ -1 +1 @@
1
- Pythonのネットワークサイズについて質問
1
+ Pythonのy=x^2学習
test CHANGED
@@ -5,59 +5,132 @@
5
5
  ### 前提
6
6
 
7
7
  Pythonで基本的なNumpyの使い方について勉強しています。試行錯誤しながらコードを書いているのですがいまいちうまくいきません。
8
- ネットワークサイズを[10,10,10]にしたいです。
9
8
 
10
9
  ### 発生している問題・エラーメッセージ
10
+
11
- どうやら入力数次元がおかしいようなのでがどこを修正すればよいのかわかりません
11
+ 学習結果をプロットしてみてみると、y=x^2あたいとはけ離れてまって
12
-
13
- ValueError Traceback (most recent call last)
14
- Cell In[7], line 163
15
- 159 t_batch = t_train[batch_mask]
16
- 161 # 勾配の計算
17
- 162 #grad = network.numerical_gradient(x_batch, t_batch)
18
- --> 163 grad = network.gradient(x_batch, t_batch)
19
- 165 # パラメータの更新
20
- 166 for key in ('W1', 'b1', 'W2', 'b2'):
21
-
22
- Cell In[7], line 92, in TwoLayerNet.gradient(self, x, t)
23
- 89 batch_num = x.shape[0]
24
- 91 # forward
25
- ---> 92 a1 = np.dot(x, W1) + b1
26
- 93 z1 = sigmoid(a1)
27
- 94 a2 = np.dot(z1, W2) + b2
28
-
29
- File <__array_function__ internals>:200, in dot(*args, **kwargs)
30
-
31
- ValueError: shapes (100,1) and (10,10) not aligned: 1 (dim 1) != 10 (dim 0)
32
12
 
33
13
  ### 該当のソースコード
34
14
 
35
15
  ```python
16
+ import numpy as np
17
+ import matplotlib.pyplot as plt
18
+ from sklearn.model_selection import train_test_split
19
+
20
+ def sigmoid(x):
21
+ return 1 / (1 + np.exp(-x))
22
+ def sigmoid_grad(x):
23
+ return (1.0 - sigmoid(x)) * sigmoid(x)
24
+ def mean_squared_error(y, t):
25
+ return 0.5 * np.sum((y - t)**2)
26
+
27
+ class Adam:
28
+ def __init__(self, lr=0.01, beta1=0.9, beta2=0.999):
29
+ self.lr = lr
30
+ self.beta1 = beta1
31
+ self.beta2 = beta2
32
+ self.iter = 0
33
+ self.m = None
34
+ self.v = None
35
+
36
+ def update(self, params, grads):
37
+ if self.m is None:
38
+ self.m, self.v = {}, {}
39
+ for key, val in params.items():
40
+ self.m[key] = np.zeros_like(val)
41
+ self.v[key] = np.zeros_like(val)
42
+
43
+ self.iter += 1
44
+ lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
45
+
46
+ for key in params.keys():
47
+ #self.m[key] = self.beta1*self.m[key] + (1-self.beta1)*grads[key]
48
+ #self.v[key] = self.beta2*self.v[key] + (1-self.beta2)*(grads[key]**2)
49
+ self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])
50
+ self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])
51
+
52
+ params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7)
53
+
54
+ class Affine:
55
+ def __init__(self, W, b):
56
+ self.W =W
57
+ self.b = b
58
+
59
+ self.x = None
60
+ self.original_x_shape = None
61
+ # 重み・バイアスパラメータの微分
62
+ self.dW = None
63
+ self.db = None
64
+
65
+ def forward(self, x):
66
+ # テンソル対応
67
+ self.original_x_shape = x.shape
68
+ x = x.reshape(x.shape[0], -1)
69
+ self.x = x
70
+
71
+ out = np.dot(self.x, self.W) + self.b
72
+
73
+ return out
74
+
75
+ def backward(self, dout):
76
+ dx = np.dot(dout, self.W.T)
77
+ self.dW = np.dot(self.x.T, dout)
78
+ self.db = np.sum(dout, axis=0)
79
+
80
+ dx = dx.reshape(*self.original_x_shape) # 入力データの形状に戻す(テンソル対応)
81
+ return dx
82
+
83
+ # データセットの生成
84
+ from sklearn.model_selection import train_test_split
85
+ np.random.seed(42)#今回はシード42
86
+ x = np.random.uniform(-1, 1, size=(100, 1))
87
+ t = x ** 2
88
+ X = np.arange(-1, 1, 0.01)
89
+ Y = X ** 2
90
+
91
+ # データセットの分割
92
+ x_train, x_val, x_test = np.split(x, [int(len(x)*0.5), int(len(x)*0.75)])
93
+ t_train, t_val, t_test = np.split(t, [int(len(y)*0.5), int(len(y)*0.75)])
94
+ # 結果の表示
95
+ #print("x_train:", x_train.shape)
96
+ #print("x_val:", x_val.shape)
97
+ #print("x_test:", x_test.shape)
98
+
99
+ import numpy as np
36
100
  class TwoLayerNet:
101
+ def sigmoid(x):
102
+ return 1 / (1 + np.exp(-x))
37
- def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
103
+ def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, output_size, weight_init_std=0.01):
38
- # 重みの初期化
104
+ # 重みの初期化
39
105
  self.params = {}
40
- self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
106
+ self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size1)
41
- self.params['b1'] = np.zeros(hidden_size)
107
+ self.params['b1'] = np.zeros(hidden_size1)
108
+ self.params['W2'] = weight_init_std * np.random.randn(hidden_size1, hidden_size2)
109
+ self.params['b2'] = np.zeros(hidden_size2)
110
+ self.params['W3'] = weight_init_std * np.random.randn(hidden_size2, hidden_size3)
111
+ self.params['b3'] = np.zeros(hidden_size3)
42
- self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
112
+ self.params['W4'] = weight_init_std * np.random.randn(hidden_size3, output_size)
43
- self.params['b2'] = np.zeros(output_size)
113
+ self.params['b4'] = np.zeros(output_size)
44
114
 
45
115
  def predict(self, x):
46
- W1, W2 = self.params['W1'], self.params['W2']
116
+ W1, W2, W3, W4 = self.params['W1'], self.params['W2'], self.params['W3'], self.params['W4']
47
- b1, b2 = self.params['b1'], self.params['b2']
117
+ b1, b2, b3, b4 = self.params['b1'], self.params['b2'], self.params['b3'], self.params['b4']
48
118
 
49
119
  a1 = np.dot(x, W1) + b1
50
120
  z1 = sigmoid(a1)
51
121
  a2 = np.dot(z1, W2) + b2
52
- y = softmax(a2)
122
+ z2 = sigmoid(a2)
53
-
123
+ a3 = np.dot(z2, W3) + b3
124
+ z3 = sigmoid(a3)
125
+ a4 = np.dot(z3, W4) + b4
126
+ y = (a4)
127
+
54
128
  return y
55
-
129
+
56
- # x:入力データ, t:教師データ
130
+ # x:入力データ, t:教師データ
57
131
  def loss(self, x, t):
58
132
  y = self.predict(x)
59
-
60
- return cross_entropy_error(y, t)
133
+ return mean_squared_error(y, t)
61
134
 
62
135
  def accuracy(self, x, t):
63
136
  y = self.predict(x)
@@ -70,46 +143,116 @@
70
143
  # x:入力データ, t:教師データ
71
144
  def numerical_gradient(self, x, t):
72
145
  loss_W = lambda W: self.loss(x, t)
73
-
74
146
  grads = {}
75
147
  grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
76
148
  grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
77
149
  grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
78
150
  grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
151
+ grads['W3'] = numerical_gradient(loss_W, self.params['W3'])
152
+ grads['b3'] = numerical_gradient(loss_W, self.params['b3'])
153
+ grads['W4'] = numerical_gradient(loss_W, self.params['W4'])
154
+ grads['b4'] = numerical_gradient(loss_W, self.params['b4'])
79
155
 
80
156
  return grads
81
-
157
+
82
158
  def gradient(self, x, t):
83
- W1, W2 = self.params['W1'], self.params['W2']
159
+ W1, W2, W3, W4 = self.params['W1'], self.params['W2'], self.params['W3'], self.params['W4']
84
- b1, b2 = self.params['b1'], self.params['b2']
160
+ b1, b2, b3, b4 = self.params['b1'], self.params['b2'], self.params['b3'], self.params['b4']
85
161
  grads = {}
86
-
162
+
87
163
  batch_num = x.shape[0]
88
-
164
+
89
165
  # forward
90
166
  a1 = np.dot(x, W1) + b1
91
167
  z1 = sigmoid(a1)
92
168
  a2 = np.dot(z1, W2) + b2
169
+ z2 = sigmoid(a2)
170
+ a3 = np.dot(z2, W3) + b3
171
+ z3 = sigmoid(a3)
172
+ a4 = np.dot(z3, W4) + b4
93
- y = softmax(a2)
173
+ y = sigmoid(a4)
94
-
174
+
95
175
  # backward
96
176
  dy = (y - t) / batch_num
97
- grads['W2'] = np.dot(z1.T, dy)
177
+ grads['W4'] = np.dot(z3.T, dy * sigmoid_grad(a4))
98
- grads['b2'] = np.sum(dy, axis=0)
178
+ grads['b4'] = np.sum(dy * sigmoid_grad(a4), axis=0)
179
+
99
-
180
+ dz3 = np.dot(dy * sigmoid_grad(a4), W4.T)
181
+ da3 = dz3 * sigmoid_grad(a3)
182
+ grads['W3'] = np.dot(z2.T, da3 * sigmoid_grad(a3))
183
+ grads['b3'] = np.sum(da3 * sigmoid_grad(a3), axis=0)
184
+
185
+ dz2 = np.dot(da3 * sigmoid_grad(a3), W3.T)
186
+ da2 = dz2 * sigmoid_grad(a2)
187
+ grads['W2'] = np.dot(z1.T, da2 * sigmoid_grad(a2))
188
+ grads['b2'] = np.sum(da2 * sigmoid_grad(a2), axis=0)
189
+
100
- dz1 = np.dot(dy, W2.T)
190
+ dz1 = np.dot(da2 * sigmoid_grad(a2), W2.T)
101
- da1 = sigmoid_grad(a1) * dz1
191
+ da1 = dz1 * sigmoid_grad(a1)
102
- grads['W1'] = np.dot(x.T, da1)
192
+ grads['W1'] = np.dot(x.T, da1 * sigmoid_grad(a1))
103
- grads['b1'] = np.sum(da1, axis=0)
193
+ grads['b1'] = np.sum(da1 * sigmoid_grad(a1), axis=0)
104
-
194
+
105
- return
195
+ return grads
106
-
196
+
107
- network = TwoLayerNet(input_size=10, hidden_size=10, output_size=10)
197
+ network = TwoLayerNet(input_size=1, hidden_size1=10, hidden_size2=10, hidden_size3=10, output_size=1)
198
+
199
+ #学習
200
+ def sigmoid(x):
201
+ return 1 / (1 + np.exp(-x))
202
+ def sigmoid_grad(x):
203
+ return (1.0 - sigmoid(x)) * sigmoid(x)
204
+ def mean_squared_error(y, t):
205
+ return 0.5 * np.sum((y - t)**2)
206
+ def softmax(x):
207
+ x = x - np.max(x, axis=-1, keepdims=True) # オーバーフロー対策
208
+ return np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True)
209
+
210
+ iters_num = 10000 # 繰り返しの回数を適宜設定する
211
+ train_size = x_train.shape[0]
212
+ batch_size = 10
213
+ learning_rate = 0.1
214
+
215
+ train_loss_list = []
216
+ train_acc_list = []
217
+ test_acc_list = []
218
+
219
+ iter_per_epoch = max(train_size / batch_size, 1)
220
+
221
+ for i in range(iters_num):
222
+ batch_mask = np.random.choice(train_size, batch_size)
223
+ x_batch = x_train[batch_mask]
224
+ y_batch = t_train[batch_mask]
225
+
226
+ # 勾配の計算
227
+ #grad = network.numerical_gradient(x_batch, y_batch)
228
+ grad = network.gradient(x_batch, y_batch)
229
+
230
+ # パラメータの更新
231
+ for key in ('W1', 'b1', 'W2', 'b2', 'W3', 'b3', 'W4', 'b4'):
232
+ network.params[key] -= learning_rate * grad[key]
233
+
234
+ loss = network.loss(x_batch, y_batch)
235
+ train_loss_list.append(loss)
236
+
237
+ optimizer = Adam
238
+ y_pred = network.predict(x_test)
239
+
240
+ import matplotlib.pyplot as plt
241
+ plt.plot(x_test,y_pred,marker='.',ls='',label='y_pred')
242
+ plt.grid()
243
+ plt.xlabel('x_test',fontsize='16')
244
+ plt.ylabel('y_pred',fontsize='16')
245
+ plt.show()
246
+ plt.close()
247
+
248
+ plt.plot(train_loss_list)
249
+ plt.xlabel('epoc')
250
+ plt.ylabel('loss')
251
+ plt.show()
108
252
  ```
109
-
110
253
  ### 試したこと
111
254
 
112
- W3,b3追加て層を増やしたりしてみましたません
255
+ Affinうまく適用ようとしたのです、いまいち使い方が分かっていません
113
256
 
114
257
  ### 補足情報(FW/ツールのバージョンなど)
115
258
 

1

改行

2023/04/06 02:06

投稿

aky
aky

スコア0

test CHANGED
File without changes
test CHANGED
@@ -32,8 +32,7 @@
32
32
 
33
33
  ### 該当のソースコード
34
34
 
35
- Python
35
+ ```python
36
-
37
36
  class TwoLayerNet:
38
37
  def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
39
38
  # 重みの初期化
@@ -106,7 +105,7 @@
106
105
  return
107
106
 
108
107
  network = TwoLayerNet(input_size=10, hidden_size=10, output_size=10)
109
-
108
+ ```
110
109
 
111
110
  ### 試したこと
112
111