自分でニューラルネットワークを作っています。手始めに、論理回路を学習させてみようと思い、入力層2ノード、隠れ層30ノード、出力層1ノードの三層のニューラルネットワークを構築しました。重みの初期値は-1.0から1.0までの値を偏りなくランダムに設定します。学習効率は0.03で、学習は、(1,1)(0,1)(1,0)(0,0)の入力と答えを順番に学習させ、これを1000回繰り返します。
この時、ANDやORは正しく?学習できましたが、XORやEXORの学習は上手くいきませんでした。
左の値は入力値で右の値は出力値です。いくつか結果を挙げます。
AND
1(1,1) = 0.750126 2(1,0) = 0.188411 3(0,1) = 0.187444 4(0,0) = 0.00650456
OR
1(1,1) = 0.982309 2(1,0) = 0.887062 3(0,1) = 0.891423 4(0,0) = 0.173294
XOR
1(1,1) = 0.511378 2(1,0) = 0.510599 3(0,1) = 0.500787 4(0,0) = 0.48574
EXOR
1(1,1) = 0.514631 2(1,0) = 0.500916 3(0,1) = 0.497562 4(0,0) = 0.493797
//一番目の値=出力値 (1,1) = 0.903502 (1,0) = 0.926415 (0,1) = 0.0982237 (0,0) = 0.0923838
グラフ上で線形に答えが区切れる場合だけ正しく学習出来ているように思えます。何が問題だと考えられますか?
参考のため、以下に全コードを張り付けておきます。
c++
1//main.cpp 2#include <array> 3#include "neural.h" 4int main() { 5 NeuralNetwork<2, 30, 1> logic(0.03); 6 array<double, 1> result; 7 for (size_t i = 0; i < 1000; i++) 8 { 9 logic.study({ {1,1} }, { {1} }); 10 logic.study({ {1,0} }, { {1} }); 11 logic.study({ {0,1} }, { {0} }); 12 logic.study({ {0,0} }, { {0} }); 13 } 14 result = logic.calculate({ {1,1} }); 15 std::cout << "(1,1) = " << result[0] << endl; 16 result = logic.calculate({ {1,0} }); 17 std::cout << "(1,0) = " << result[0] << endl; 18 result = logic.calculate({ {0,1} }); 19 std::cout << "(0,1) = " << result[0] << endl; 20 result = logic.calculate({ {0,0} }); 21 std::cout << "(0,0) = " << result[0] << endl; 22}
c++
1//neural.h 2#include <random> 3#include <cmath> 4#include <sstream> 5#include <array> 6#include <functional> 7 8template <typename _Ty, size_t _column, size_t _row> class matrix { 9private: 10 std::array<std::array<_Ty, _row>, _column> content; 11public: 12 static void foreach(std::function<void(size_t colmun, size_t row)> doeach, std::function<void(size_t colmun)>doeachendofcolumn = [](size_t colmun) {}) { 13 for (size_t i = 0; i < _column; i++) 14 { 15 for (size_t j = 0; j < _row; j++) 16 { 17 doeach(i, j); 18 } 19 doeachendofcolumn(i); 20 } 21 } 22 matrix() { foreach([&](size_t i, size_t j) {content[i][j] = _Ty(); }); }; 23 matrix(std::array<std::array<_Ty, _row>, _column> i) { content = i; } 24 constexpr size_t row() const { return _row; } 25 constexpr size_t column() const { return _column; } 26 matrix<_Ty, _row, _column> transpose() const { matrix<_Ty, _row, _column>re; foreach([&](size_t i, size_t j) {re[j][i] = content[i][j]; }); return re; } 27 std::array<_Ty, _row> const& operator [](size_t t) const { return content[t]; } 28 std::array<_Ty, _row>& operator [](size_t t) { return content[t]; } 29 matrix<_Ty, _column, _row>& operator +=(matrix<_Ty, _column, _row> const& m) { 30 foreach([&](size_t i, size_t j) {content[i][j] = content[i][j] + m[i][j]; }); 31 return *this; 32 } 33 matrix<_Ty, _column, _row>& operator -=(matrix<_Ty, _column, _row> const& m) { 34 foreach([&](size_t i, size_t j) {content[i][j] = content[i][j] - m[i][j]; }); 35 return *this; 36 } 37 matrix<_Ty, _column, _row>& operator *=(size_t i) { 38 foreach([&](size_t i, size_t j) {content[i][j] *= i; }); 39 return *this; 40 } 41 template<typename _nTy> 42 operator _nTy() const { matrix<_nTy, _column, _row> re; foreach([&](size_t i, size_t j) {re[i][j] = (_nTy)content[i][j]; }); return re; } 43 std::string str() const { 44 std::stringstream ss; 45 foreach([&](size_t i, size_t j) {ss << content[i][j] << ",\t"; }, [&](size_t) { ss << "\n"; }); 46 return ss.str(); 47 } 48}; 49template <typename _Ty, size_t _column, size_t _row> 50const matrix<_Ty, _column, _row> operator +(matrix<_Ty, _column, _row> const& f, matrix<_Ty, _column, _row> const& s) { 51 return matrix<_Ty, _column, _row>(f) += s; 52} 53template <typename _Ty, size_t _column, size_t _row> 54const matrix<_Ty, _column, _row> operator -(matrix<_Ty, _column, _row> const& f, matrix<_Ty, _column, _row> const& s) { 55 return matrix<_Ty, _column, _row>(f) -= s; 56} 57template <typename _Ty, size_t _column, size_t _row_scolumn, size_t _srow> 58const matrix<_Ty, _column, _srow> operator *(matrix<_Ty, _column, _row_scolumn> const& f, matrix<_Ty, _row_scolumn, _srow> const& s) { 59 matrix<_Ty, _column, _srow> re; 60 matrix<_Ty, _column, _row_scolumn>::foreach([&](size_t i, size_t j) { 61 for (size_t k = 0; k < _srow; k++) { 62 re[i][k] += f[i][j] * s[j][k];//配列を横に動いてから縦に動くのでメモリアクセス的に高速なはず 63 } 64 }); 65 66 return re; 67} 68template <typename _Ty, size_t _column, size_t _row> 69const matrix<_Ty, _column, _row> operator *(size_t i, matrix<_Ty, _column, _row> const& m) { 70 return matrix<_Ty, _column, _row>(m) *= i; 71} 72template <typename _Ty, size_t _column, size_t _row> 73const matrix<_Ty, _column, _row> operator *(matrix<_Ty, _column, _row> const& m, size_t i) { 74 return m * i; 75} 76 77std::random_device seed_gen; 78std::default_random_engine engine(seed_gen()); 79constexpr double sigmoid_range = 34.538776394910684; 80double sigmoid(double d) { 81 if (d > sigmoid_range) d = sigmoid_range; 82 if (d < -sigmoid_range) d = -sigmoid_range; 83 return 1 / (1 + std::exp(-d)); 84} 85void avoid_flow(double& d, double upper_range = 1.0e300, double under_range = 1.0e-300) { 86 if (d < -upper_range) d = -upper_range; 87 if (d > -under_range && d < under_range) d = 0; 88 if (d > upper_range) d = upper_range; 89} 90template<size_t nodecount> 91class Layer { 92private: 93 matrix<double, nodecount, 1> nodes; 94public: 95 Layer() {}; 96 Layer(matrix<double, nodecount, 1> m) { nodes = m; } 97 operator matrix<double, nodecount, 1>() { return nodes; } 98 Layer(std::array<double, nodecount> ar) { 99 for (size_t i = 0; i < nodecount; i++) 100 { 101 nodes[i][0] = ar[i]; 102 } 103 } 104 operator std::array<double, nodecount>() { 105 std::array<double, nodecount> re; 106 for (size_t i = 0; i < nodecount; i++) { 107 re[i] = nodes[i][0]; 108 } 109 return re; 110 } 111 double& operator[](size_t i) { return nodes[i][0]; } 112 void activation() { 113 for (size_t i = 0; i < nodecount; i++) 114 { 115 nodes[i][0] = sigmoid(nodes[i][0]); 116 } 117 } 118}; 119template<size_t inputnodecount, size_t outputnodecount> 120class Weight { 121private: 122 matrix<double, outputnodecount, inputnodecount> weight; 123public: 124 Weight() {}; 125 Weight(matrix<double, outputnodecount, inputnodecount> const& m) { weight = m; }; 126 void renew(Layer<inputnodecount> input, Layer<outputnodecount> delta, double learning_rate) { 127 weight.foreach([&](size_t i, size_t j) {weight[i][j] -= learning_rate * delta[i] * input[j]; avoid_flow(weight[i][j]); }); 128 } 129 Weight<outputnodecount, inputnodecount> transpose() { return weight.transpose(); }; 130 operator matrix<double, outputnodecount, inputnodecount>() { return weight; } 131 std::array<double, inputnodecount> operator[](size_t i) { return weight[i]; } 132 void randamize() { 133 std::uniform_real_distribution<> rnd(-1.0, 1.0); 134 weight.foreach([&](size_t i, size_t j) {weight[i][j] = rnd(engine); }); 135 } 136}; 137template<size_t inputnodecount, size_t outputnodecount> 138Layer<outputnodecount> const operator*(Weight<inputnodecount, outputnodecount> weight, Layer<inputnodecount> layer) { return (matrix<double, outputnodecount, inputnodecount>)weight * (matrix<double, inputnodecount, 1>)layer; } 139template<size_t inputnodecount, size_t hiddennodecount, size_t outputnodecount> 140class NeuralNetwork { 141private: 142 double learning_rate; 143 Weight<inputnodecount, hiddennodecount> input_to_hidden; 144 Weight<hiddennodecount, outputnodecount> hidden_to_output; 145 std::tuple<Layer<inputnodecount>, Layer<hiddennodecount>, Layer<outputnodecount>> calc(Layer<inputnodecount> input) { 146 Layer<hiddennodecount> hidden = input_to_hidden * input; 147 hidden.activation(); 148 Layer<outputnodecount> output = hidden_to_output * hidden; 149 output.activation(); 150 std::tuple<Layer<inputnodecount>, Layer<hiddennodecount>, Layer<outputnodecount>> re = std::make_tuple(input, hidden, output); 151 return re; 152 } 153public: 154 explicit NeuralNetwork(double rate = 0.3) :learning_rate(rate) { input_to_hidden.randamize(); hidden_to_output.randamize(); }; 155 std::array<double, outputnodecount> calculate(std::array<double, inputnodecount> in) { 156 return std::get<2>(calc(in)); 157 } 158 void study(std::array<double, inputnodecount> in, std::array<double, outputnodecount> answer) { 159 Layer<inputnodecount> input; 160 Layer<hiddennodecount> hidden; 161 Layer<outputnodecount> output; 162 std::tie(input, hidden, output) = calc(in); 163 Layer<outputnodecount> delta_out; 164 for (size_t i = 0; i < outputnodecount; i++) 165 { 166 delta_out[i] = (output[i] - answer[i]) * output[i] * (1.0 - output[i]); 167 } 168 hidden_to_output.renew(hidden, delta_out, learning_rate); 169 Layer<hiddennodecount> delta_hidden = hidden_to_output.transpose() * delta_out; 170 input_to_hidden.renew(input, delta_hidden, learning_rate); 171 } 172};
回答2件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
2021/02/25 07:07 編集
2021/02/25 17:57