質問編集履歴
2
誤字
title
CHANGED
File without changes
|
body
CHANGED
@@ -5,215 +5,4 @@
|
|
5
5
|
$2 = {99, 49, 33, 79}
|
6
6
|
のように乱数が入っていました。
|
7
7
|
また、Whileループを進めてみたところnextdirectionに1が入った後は2が入り、2が入った後は1が入り・・・とループしていることがわかりました。
|
8
|
-
何が原因でこのような挙動をしているのかご教授お願い致します。
|
8
|
+
何が原因でこのような挙動をしているのかご教授お願い致します。
|
9
|
-
```c
|
10
|
-
コード
|
11
|
-
#include<stdio.h>
|
12
|
-
#include<math.h>
|
13
|
-
#include<stdlib.h>
|
14
|
-
|
15
|
-
int maze[25][25] = {
|
16
|
-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
17
|
-
1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,
|
18
|
-
1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,
|
19
|
-
1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,
|
20
|
-
1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,
|
21
|
-
1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,
|
22
|
-
1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,
|
23
|
-
1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,
|
24
|
-
1,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,
|
25
|
-
1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,
|
26
|
-
1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,
|
27
|
-
1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,1,
|
28
|
-
1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
29
|
-
1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
30
|
-
1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,
|
31
|
-
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,
|
32
|
-
1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,1,
|
33
|
-
1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,
|
34
|
-
1,0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,1,
|
35
|
-
1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,
|
36
|
-
1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,1,1,
|
37
|
-
1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,
|
38
|
-
1,1,1,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,
|
39
|
-
1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,
|
40
|
-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
|
41
|
-
};
|
42
|
-
|
43
|
-
|
44
|
-
void setQ(double Q[][25][4], int b){//Q値をbが0なら全て0に、それ以外の値なら100以下のランダムな整数をセットする
|
45
|
-
int i, j, k;
|
46
|
-
srand((unsigned) time(NULL));
|
47
|
-
if(b == 0){
|
48
|
-
for(i = 0; i < 25; i++){
|
49
|
-
for(j = 0; j < 25; j++){
|
50
|
-
for(k = 0; k < 4; k++){
|
51
|
-
Q[i][j][k] = 0;
|
52
|
-
}
|
53
|
-
}
|
54
|
-
}
|
55
|
-
}
|
56
|
-
else{
|
57
|
-
for(i = 0; i < 25; i++){
|
58
|
-
for(j = 0; j < 25; j++){
|
59
|
-
for(k = 0; k < 4; k++){
|
60
|
-
Q[i][j][k] = rand()%101;
|
61
|
-
}
|
62
|
-
}
|
63
|
-
}
|
64
|
-
}
|
65
|
-
}
|
66
|
-
|
67
|
-
int randomroot(int direction[],int currentplace[]){//現在地から行くことのできる方向の中からランダムで選ぶ
|
68
|
-
int i;
|
69
|
-
srand((unsigned) time(NULL));
|
70
|
-
int r = rand()%5;
|
71
|
-
int cd[4] = {0};
|
72
|
-
int d[4] ;
|
73
|
-
|
74
|
-
if(maze[currentplace[0]][currentplace[1]-1] != 1){
|
75
|
-
cd[0] = 1;
|
76
|
-
}
|
77
|
-
if(maze[currentplace[0]-1][currentplace[1]] != 1){
|
78
|
-
cd[1] = 1;
|
79
|
-
}
|
80
|
-
if(maze[currentplace[0]][currentplace[1]+1] != 1){
|
81
|
-
cd[2] = 1;
|
82
|
-
}
|
83
|
-
if(maze[currentplace[0]+1][currentplace[1]] != 1){
|
84
|
-
cd[3] = 1;
|
85
|
-
}
|
86
|
-
for(i = 0; i < 4; i++){
|
87
|
-
d[i] = (i + 1) * cd[i];
|
88
|
-
}
|
89
|
-
while(r == d[1]||r == d[2]||r == d[3]||r == d[4]){
|
90
|
-
r = rand()%4 + 1;
|
91
|
-
}
|
92
|
-
return r-1;
|
93
|
-
}
|
94
|
-
|
95
|
-
|
96
|
-
int max(int direction[],int currentplace[]){//現在地から行ける方向の中で最大のQ値をもつ方向を返す
|
97
|
-
|
98
|
-
int i;
|
99
|
-
int b;
|
100
|
-
int max;
|
101
|
-
int cd[4] = {0};
|
102
|
-
srand((unsigned) time(NULL));
|
103
|
-
|
104
|
-
if(maze[currentplace[0]][currentplace[1]-1] != 1){
|
105
|
-
max = 0;
|
106
|
-
cd[0] = 1;
|
107
|
-
}
|
108
|
-
if(maze[currentplace[0]-1][currentplace[1]] != 1){
|
109
|
-
max = 1;
|
110
|
-
cd[1] = 1;
|
111
|
-
}
|
112
|
-
if(maze[currentplace[0]][currentplace[1]+1] != 1){
|
113
|
-
max = 2;
|
114
|
-
cd[2] = 1;
|
115
|
-
}
|
116
|
-
if(maze[currentplace[0]+1][currentplace[1]] != 1){
|
117
|
-
max = 3;
|
118
|
-
cd[3] = 1;
|
119
|
-
}
|
120
|
-
|
121
|
-
for(i = 3; i >= 0; i--){
|
122
|
-
if(direction[max] < direction[i] && cd[i] == 1){
|
123
|
-
max = i;
|
124
|
-
}
|
125
|
-
else if(direction[max] == direction[i] && cd[i] == 1){
|
126
|
-
b = rand()%2;
|
127
|
-
if(b == 0){
|
128
|
-
max = i;
|
129
|
-
}
|
130
|
-
}
|
131
|
-
}
|
132
|
-
return max;
|
133
|
-
}
|
134
|
-
|
135
|
-
int r(int currentplace0, int currentplace1){//座標(23,23)に来たときのみ報酬100が与えられる
|
136
|
-
int r = 0;
|
137
|
-
if(currentplace0 == 23 && currentplace1 == 23){
|
138
|
-
r = 100;
|
139
|
-
}
|
140
|
-
return r;
|
141
|
-
}
|
142
|
-
|
143
|
-
int main(){
|
144
|
-
int b;
|
145
|
-
int i,j,k;
|
146
|
-
int ran;
|
147
|
-
int e;
|
148
|
-
double Q[25][25][4];
|
149
|
-
int cp[2] = {1,1};// 現在地の座標
|
150
|
-
int direction[4] = {0};//[0] = up, [1] = left; [2] = down, [3] = rightのQ値を格納する
|
151
|
-
int nextdirection;
|
152
|
-
|
153
|
-
srand((unsigned) time(NULL));
|
154
|
-
|
155
|
-
printf("all 0 or rand\n");
|
156
|
-
scanf("%d", &b);
|
157
|
-
setQ(Q, b);
|
158
|
-
printf("0 <= e <= 100\n");//eが0なら最大化方策、それ以外はεグリーディ方策をとる
|
159
|
-
scanf("%d",&e);
|
160
|
-
for(j = 0; j < 100; j++){
|
161
|
-
cp[0] = 1;
|
162
|
-
cp[1] = 1;
|
163
|
-
for(i = 0; i < 4; i++){
|
164
|
-
direction[i] = Q[cp[0]][cp[1]][i];
|
165
|
-
}
|
166
|
-
while(cp[0] != 23 || cp[1] != 23){
|
167
|
-
ran = rand()%100 + 1;
|
168
|
-
if(ran <= e){
|
169
|
-
nextdirection = randomroot(direction,cp);
|
170
|
-
}
|
171
|
-
else{
|
172
|
-
nextdirection = max(direction,cp);
|
173
|
-
}
|
174
|
-
|
175
|
-
if(nextdirection == 0){
|
176
|
-
cp[1] = cp[1] - 1;
|
177
|
-
for(i = 0; i < 4; i++){
|
178
|
-
direction[i] = Q[cp[0]][cp[1]][i];
|
179
|
-
}
|
180
|
-
Q[cp[0]][cp[1]+1][0] += 0.1*(r(cp[0],cp[1]) + 0.95*Q[cp[0]][cp[1]][max(direction,cp)] - Q[cp[0]][cp[1]+1][0]);
|
181
|
-
}
|
182
|
-
else if(nextdirection == 1){
|
183
|
-
cp[0] = cp[0] - 1;
|
184
|
-
for(i = 0; i < 4; i++){
|
185
|
-
direction[i] = Q[cp[0]][cp[1]][i];
|
186
|
-
}
|
187
|
-
Q[cp[0]+1][cp[1]][1] += 0.1*(r(cp[0],cp[1]) + 0.95*Q[cp[0]][cp[1]][max(direction,cp)] - Q[cp[0]+1][cp[1]][1]);
|
188
|
-
}
|
189
|
-
else if(nextdirection == 2){
|
190
|
-
cp[1] = cp[1] + 1;
|
191
|
-
for(i = 0; i < 4; i++){
|
192
|
-
direction[i] = Q[cp[0]][cp[1]][i];
|
193
|
-
}
|
194
|
-
Q[cp[0]][cp[1]-1][2] += 0.1*(r(cp[0],cp[1]) + 0.95*Q[cp[0]][cp[1]][max(direction,cp)] - Q[cp[0]][cp[1]-1][2]);
|
195
|
-
}
|
196
|
-
else{
|
197
|
-
cp[0] = cp[0] + 1;
|
198
|
-
for(i = 0; i < 4; i++){
|
199
|
-
direction[i] = Q[cp[0]][cp[1]][i];
|
200
|
-
}
|
201
|
-
Q[cp[0]-1][cp[1]][3] += 0.1*(r(cp[0],cp[1]) + 0.95*Q[cp[0]][cp[1]][max(direction,cp)] - Q[cp[0]-1][cp[1]][3]);
|
202
|
-
}
|
203
|
-
}
|
204
|
-
}
|
205
|
-
|
206
|
-
for(i = 0; i < 25; i++){
|
207
|
-
printf("\n");
|
208
|
-
for(j = 0; j < 25; j++){
|
209
|
-
for(k = 0; k < 4; k++){
|
210
|
-
printf("{");
|
211
|
-
printf("%f ",Q[i][j][k]);
|
212
|
-
printf("},");
|
213
|
-
}
|
214
|
-
}
|
215
|
-
}
|
216
|
-
return 0;
|
217
|
-
}
|
218
|
-
|
219
|
-
```
|
1
タイトルの変更
title
CHANGED
@@ -1,1 +1,1 @@
|
|
1
|
-
Q学習で
|
1
|
+
Q学習で経路の学習をさせるcプログラム
|
body
CHANGED
File without changes
|