編集履歴

質問編集履歴

学習結果の画像添付

2023/04/07 01:32

投稿

スコア0

title CHANGED Viewed

	@@ -1,1 +1,1 @@
1	- ~~Pythonの~~y=x^2の学習
1	+ y=x^2の学習が進まない

body CHANGED Viewed

@@ -9,7 +9,9 @@
 ### 発生している問題・エラーメッセージ
 学習結果をプロットしてみてみると、y=x^2のあたいとはかけ離れてしまっています。
+![イメージ説明](https://ddjkaamml8q8x.cloudfront.net/questions/2023-04-07/30d8dd18-a2c1-40c6-a9cc-5b210abef75f.png)
+![イメージ説明](https://ddjkaamml8q8x.cloudfront.net/questions/2023-04-07/47fd7d34-2742-4167-b0df-ea6b33b9dabc.png)
 ### 該当のソースコード
 ```python

Jupyter Python 3.x

コードの変更

2023/04/06 06:29

投稿

aky

スコア0

title CHANGED Viewed

	@@ -1,1 +1,1 @@
1	- Pythonの~~ネットワークサイズについて~~の質問
1	+ Pythonのy=x^2の学習

body CHANGED Viewed

@@ -5,59 +5,132 @@
 ### 前提
 Pythonで基本的なNumpyの使い方について勉強しています。試行錯誤しながらコードを書いているのですがいまいちうまくいきません。
-ネットワークサイズを[10,10,10]にしたいです。
 ### 発生している問題・エラーメッセージ
-どうやら入力数の次元がおかしいようなのですがどこを修正すればよいのかわかりません
-ValueError                                Traceback (most recent call last)
-Cell In[7], line 163
-    159 t_batch = t_train[batch_mask]
+学習結果をプロットしてみてみると、y=x^2のあたいとはかけ離れてしまっています。
-    161 # 勾配の計算
-    162 #grad = network.numerical_gradient(x_batch, t_batch)
---> 163 grad = network.gradient(x_batch, t_batch)
-    165 # パラメータの更新
-    166 for key in ('W1', 'b1', 'W2', 'b2'):
-Cell In[7], line 92, in TwoLayerNet.gradient(self, x, t)
-     89 batch_num = x.shape[0]
-     91 # forward
+### 該当のソースコード
----> 92 a1 = np.dot(x, W1) + b1
-     93 z1 = sigmoid(a1)
-     94 a2 = np.dot(z1, W2) + b2
+```python
+import numpy as np
+import matplotlib.pyplot as plt
-File <__array_function__ internals>:200, in dot(*args, **kwargs)
+from sklearn.model_selection import train_test_split
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+def sigmoid_grad(x):
-ValueError: shapes (100,1) and (10,10) not aligned: 1 (dim 1) != 10 (dim 0)
+    return (1.0 - sigmoid(x)) * sigmoid(x)
+def mean_squared_error(y, t):
+    return 0.5 * np.sum((y - t)**2)
+class Adam:
+    def __init__(self, lr=0.01, beta1=0.9, beta2=0.999):
+        self.lr = lr
+        self.beta1 = beta1
+        self.beta2 = beta2
-### 該当のソースコード
+        self.iter = 0
+        self.m = None
+        self.v = None
+    def update(self, params, grads):
+        if self.m is None:
+            self.m, self.v = {}, {}
+            for key, val in params.items():
+                self.m[key] = np.zeros_like(val)
+                self.v[key] = np.zeros_like(val)
+        self.iter += 1
+        lr_t  = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
+        for key in params.keys():
+            #self.m[key] = self.beta1*self.m[key] + (1-self.beta1)*grads[key]
+            #self.v[key] = self.beta2*self.v[key] + (1-self.beta2)*(grads[key]**2)
+            self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])
+            self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])
+            params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7)
+class Affine:
+    def __init__(self, W, b):
+        self.W =W
+        self.b = b
+        self.x = None
+        self.original_x_shape = None
+        # 重み・バイアスパラメータの微分
+        self.dW = None
+        self.db = None
+    def forward(self, x):
+        # テンソル対応
+        self.original_x_shape = x.shape
+        x = x.reshape(x.shape[0], -1)
+        self.x = x
+        out = np.dot(self.x, self.W) + self.b
-```python
+        return out
+    def backward(self, dout):
+        dx = np.dot(dout, self.W.T)
+        self.dW = np.dot(self.x.T, dout)
+        self.db = np.sum(dout, axis=0)
+        dx = dx.reshape(*self.original_x_shape)  # 入力データの形状に戻す（テンソル対応）
+        return dx
+# データセットの生成
+from sklearn.model_selection import train_test_split
+np.random.seed(42)#今回はシード42
+x = np.random.uniform(-1, 1, size=(100, 1))
+t = x ** 2
+X = np.arange(-1, 1, 0.01)
+Y = X ** 2
+# データセットの分割
+x_train, x_val, x_test = np.split(x, [int(len(x)*0.5), int(len(x)*0.75)])
+t_train, t_val, t_test = np.split(t, [int(len(y)*0.5), int(len(y)*0.75)])
+# 結果の表示
+#print("x_train:", x_train.shape)
+#print("x_val:", x_val.shape)
+#print("x_test:", x_test.shape)
+import numpy as np
 class TwoLayerNet:
+    def sigmoid(x):
+        return 1 / (1 + np.exp(-x))
-    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
+    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, output_size, weight_init_std=0.01):
-        # 重みの初期化
+    # 重みの初期化
         self.params = {}
-        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
+        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size1)
-        self.params['b1'] = np.zeros(hidden_size)
+        self.params['b1'] = np.zeros(hidden_size1)
+        self.params['W2'] = weight_init_std * np.random.randn(hidden_size1, hidden_size2)
+        self.params['b2'] = np.zeros(hidden_size2)
+        self.params['W3'] = weight_init_std * np.random.randn(hidden_size2, hidden_size3)
+        self.params['b3'] = np.zeros(hidden_size3)
-        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
+        self.params['W4'] = weight_init_std * np.random.randn(hidden_size3, output_size)
-        self.params['b2'] = np.zeros(output_size)
+        self.params['b4'] = np.zeros(output_size)
     def predict(self, x):
-        W1, W2 = self.params['W1'], self.params['W2']
+        W1, W2, W3, W4 = self.params['W1'], self.params['W2'], self.params['W3'], self.params['W4']
-        b1, b2 = self.params['b1'], self.params['b2']
+        b1, b2, b3, b4 = self.params['b1'], self.params['b2'], self.params['b3'], self.params['b4']
         a1 = np.dot(x, W1) + b1
         z1 = sigmoid(a1)
         a2 = np.dot(z1, W2) + b2
-        y = softmax(a2)
+        z2 = sigmoid(a2)
+        a3 = np.dot(z2, W3) + b3
+        z3 = sigmoid(a3)
+        a4 = np.dot(z3, W4) + b4
+        y  = (a4)
         return y
-    # x:入力データ, t:教師データ
+# x:入力データ, t:教師データ
     def loss(self, x, t):
         y = self.predict(x)
-        return cross_entropy_error(y, t)
+        return mean_squared_error(y, t)
     def accuracy(self, x, t):
         y = self.predict(x)
@@ -70,46 +143,116 @@
     # x:入力データ, t:教師データ
     def numerical_gradient(self, x, t):
         loss_W = lambda W: self.loss(x, t)
         grads = {}
         grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
         grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
         grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
         grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
+        grads['W3'] = numerical_gradient(loss_W, self.params['W3'])
+        grads['b3'] = numerical_gradient(loss_W, self.params['b3'])
+        grads['W4'] = numerical_gradient(loss_W, self.params['W4'])
+        grads['b4'] = numerical_gradient(loss_W, self.params['b4'])
         return grads
     def gradient(self, x, t):
-        W1, W2 = self.params['W1'], self.params['W2']
+        W1, W2, W3, W4 = self.params['W1'], self.params['W2'], self.params['W3'], self.params['W4']
-        b1, b2 = self.params['b1'], self.params['b2']
+        b1, b2, b3, b4 = self.params['b1'], self.params['b2'], self.params['b3'], self.params['b4']
         grads = {}
         batch_num = x.shape[0]
         # forward
         a1 = np.dot(x, W1) + b1
         z1 = sigmoid(a1)
         a2 = np.dot(z1, W2) + b2
+        z2 = sigmoid(a2)
+        a3 = np.dot(z2, W3) + b3
+        z3 = sigmoid(a3)
+        a4 = np.dot(z3, W4) + b4
-        y = softmax(a2)
+        y = sigmoid(a4)
         # backward
         dy = (y - t) / batch_num
-        grads['W2'] = np.dot(z1.T, dy)
+        grads['W4'] = np.dot(z3.T, dy * sigmoid_grad(a4))
-        grads['b2'] = np.sum(dy, axis=0)
+        grads['b4'] = np.sum(dy * sigmoid_grad(a4), axis=0)
+        dz3 = np.dot(dy * sigmoid_grad(a4), W4.T)
+        da3 = dz3 * sigmoid_grad(a3)
+        grads['W3'] = np.dot(z2.T, da3 * sigmoid_grad(a3))
+        grads['b3'] = np.sum(da3 * sigmoid_grad(a3), axis=0)
+        dz2 = np.dot(da3 * sigmoid_grad(a3), W3.T)
+        da2 = dz2 * sigmoid_grad(a2)
+        grads['W2'] = np.dot(z1.T, da2 * sigmoid_grad(a2))
+        grads['b2'] = np.sum(da2 * sigmoid_grad(a2), axis=0)
-        dz1 = np.dot(dy, W2.T)
+        dz1 = np.dot(da2 * sigmoid_grad(a2), W2.T)
-        da1 = sigmoid_grad(a1) * dz1
+        da1 = dz1 * sigmoid_grad(a1)
-        grads['W1'] = np.dot(x.T, da1)
+        grads['W1'] = np.dot(x.T, da1 * sigmoid_grad(a1))
-        grads['b1'] = np.sum(da1, axis=0)
+        grads['b1'] = np.sum(da1 * sigmoid_grad(a1), axis=0)
-        return
+        return grads
-network = TwoLayerNet(input_size=10, hidden_size=10, output_size=10)
+network = TwoLayerNet(input_size=1, hidden_size1=10, hidden_size2=10, hidden_size3=10, output_size=1)
+#学習
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+def sigmoid_grad(x):
+    return (1.0 - sigmoid(x)) * sigmoid(x)
+def mean_squared_error(y, t):
+    return 0.5 * np.sum((y - t)**2)
+def softmax(x):
+    x = x - np.max(x, axis=-1, keepdims=True)   # オーバーフロー対策
+    return np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True)
+iters_num = 10000  # 繰り返しの回数を適宜設定する
+train_size = x_train.shape[0]
+batch_size = 10
+learning_rate = 0.1
+train_loss_list = []
+train_acc_list = []
+test_acc_list = []
+iter_per_epoch = max(train_size / batch_size, 1)
+for i in range(iters_num):
+    batch_mask = np.random.choice(train_size, batch_size)
+    x_batch = x_train[batch_mask]
+    y_batch = t_train[batch_mask]
+    # 勾配の計算
+    #grad = network.numerical_gradient(x_batch, y_batch)
+    grad = network.gradient(x_batch, y_batch)
+    # パラメータの更新
+    for key in ('W1', 'b1', 'W2', 'b2', 'W3', 'b3', 'W4', 'b4'):
+        network.params[key] -= learning_rate * grad[key]
+    loss = network.loss(x_batch, y_batch)
+    train_loss_list.append(loss)
+    optimizer = Adam
+y_pred = network.predict(x_test)
+import matplotlib.pyplot as plt
+plt.plot(x_test,y_pred,marker='.',ls='',label='y_pred')
+plt.grid()
+plt.xlabel('x_test',fontsize='16')
+plt.ylabel('y_pred',fontsize='16')
+plt.show()
+plt.close()
+plt.plot(train_loss_list)
+plt.xlabel('epoc')
+plt.ylabel('loss')
+plt.show()
 ```
 ### 試したこと
-W3,b3を追加して層を増やしたりしてみましたがうまくいきません
+Affinをうまく適用しようとしたのですが、いまいち使い方が分かっていません
 ### 補足情報（FW/ツールのバージョンなど）

Jupyter Python 3.x

改行

2023/04/06 02:06

投稿

aky

スコア0

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -32,8 +32,7 @@
 ### 該当のソースコード
-Python
+```python
 class TwoLayerNet:
     def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
         # 重みの初期化
@@ -106,8 +105,8 @@
         return
 network = TwoLayerNet(input_size=10, hidden_size=10, output_size=10)
+```
 ### 試したこと
 W3,b3を追加して層を増やしたりしてみましたがうまくいきません

Jupyter Python 3.x