質問編集履歴
2
誤字
test
CHANGED
File without changes
|
test
CHANGED
@@ -13,425 +13,3 @@
|
|
13
13
|
また、Whileループを進めてみたところnextdirectionに1が入った後は2が入り、2が入った後は1が入り・・・とループしていることがわかりました。
|
14
14
|
|
15
15
|
何が原因でこのような挙動をしているのかご教授お願い致します。
|
16
|
-
|
17
|
-
```c
|
18
|
-
|
19
|
-
コード
|
20
|
-
|
21
|
-
#include<stdio.h>
|
22
|
-
|
23
|
-
#include<math.h>
|
24
|
-
|
25
|
-
#include<stdlib.h>
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
int maze[25][25] = {
|
30
|
-
|
31
|
-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
32
|
-
|
33
|
-
1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,
|
34
|
-
|
35
|
-
1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,
|
36
|
-
|
37
|
-
1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,
|
38
|
-
|
39
|
-
1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,
|
40
|
-
|
41
|
-
1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,
|
42
|
-
|
43
|
-
1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,
|
44
|
-
|
45
|
-
1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,
|
46
|
-
|
47
|
-
1,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,
|
48
|
-
|
49
|
-
1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,
|
50
|
-
|
51
|
-
1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,
|
52
|
-
|
53
|
-
1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,1,
|
54
|
-
|
55
|
-
1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
56
|
-
|
57
|
-
1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
58
|
-
|
59
|
-
1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,
|
60
|
-
|
61
|
-
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,
|
62
|
-
|
63
|
-
1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,1,
|
64
|
-
|
65
|
-
1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,
|
66
|
-
|
67
|
-
1,0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,1,
|
68
|
-
|
69
|
-
1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,
|
70
|
-
|
71
|
-
1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,1,1,
|
72
|
-
|
73
|
-
1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,
|
74
|
-
|
75
|
-
1,1,1,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,
|
76
|
-
|
77
|
-
1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,
|
78
|
-
|
79
|
-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
|
80
|
-
|
81
|
-
};
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
void setQ(double Q[][25][4], int b){//Q値をbが0なら全て0に、それ以外の値なら100以下のランダムな整数をセットする
|
88
|
-
|
89
|
-
int i, j, k;
|
90
|
-
|
91
|
-
srand((unsigned) time(NULL));
|
92
|
-
|
93
|
-
if(b == 0){
|
94
|
-
|
95
|
-
for(i = 0; i < 25; i++){
|
96
|
-
|
97
|
-
for(j = 0; j < 25; j++){
|
98
|
-
|
99
|
-
for(k = 0; k < 4; k++){
|
100
|
-
|
101
|
-
Q[i][j][k] = 0;
|
102
|
-
|
103
|
-
}
|
104
|
-
|
105
|
-
}
|
106
|
-
|
107
|
-
}
|
108
|
-
|
109
|
-
}
|
110
|
-
|
111
|
-
else{
|
112
|
-
|
113
|
-
for(i = 0; i < 25; i++){
|
114
|
-
|
115
|
-
for(j = 0; j < 25; j++){
|
116
|
-
|
117
|
-
for(k = 0; k < 4; k++){
|
118
|
-
|
119
|
-
Q[i][j][k] = rand()%101;
|
120
|
-
|
121
|
-
}
|
122
|
-
|
123
|
-
}
|
124
|
-
|
125
|
-
}
|
126
|
-
|
127
|
-
}
|
128
|
-
|
129
|
-
}
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
int randomroot(int direction[],int currentplace[]){//現在地から行くことのできる方向の中からランダムで選ぶ
|
134
|
-
|
135
|
-
int i;
|
136
|
-
|
137
|
-
srand((unsigned) time(NULL));
|
138
|
-
|
139
|
-
int r = rand()%5;
|
140
|
-
|
141
|
-
int cd[4] = {0};
|
142
|
-
|
143
|
-
int d[4] ;
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
if(maze[currentplace[0]][currentplace[1]-1] != 1){
|
148
|
-
|
149
|
-
cd[0] = 1;
|
150
|
-
|
151
|
-
}
|
152
|
-
|
153
|
-
if(maze[currentplace[0]-1][currentplace[1]] != 1){
|
154
|
-
|
155
|
-
cd[1] = 1;
|
156
|
-
|
157
|
-
}
|
158
|
-
|
159
|
-
if(maze[currentplace[0]][currentplace[1]+1] != 1){
|
160
|
-
|
161
|
-
cd[2] = 1;
|
162
|
-
|
163
|
-
}
|
164
|
-
|
165
|
-
if(maze[currentplace[0]+1][currentplace[1]] != 1){
|
166
|
-
|
167
|
-
cd[3] = 1;
|
168
|
-
|
169
|
-
}
|
170
|
-
|
171
|
-
for(i = 0; i < 4; i++){
|
172
|
-
|
173
|
-
d[i] = (i + 1) * cd[i];
|
174
|
-
|
175
|
-
}
|
176
|
-
|
177
|
-
while(r == d[1]||r == d[2]||r == d[3]||r == d[4]){
|
178
|
-
|
179
|
-
r = rand()%4 + 1;
|
180
|
-
|
181
|
-
}
|
182
|
-
|
183
|
-
return r-1;
|
184
|
-
|
185
|
-
}
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
int max(int direction[],int currentplace[]){//現在地から行ける方向の中で最大のQ値をもつ方向を返す
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
int i;
|
196
|
-
|
197
|
-
int b;
|
198
|
-
|
199
|
-
int max;
|
200
|
-
|
201
|
-
int cd[4] = {0};
|
202
|
-
|
203
|
-
srand((unsigned) time(NULL));
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
if(maze[currentplace[0]][currentplace[1]-1] != 1){
|
208
|
-
|
209
|
-
max = 0;
|
210
|
-
|
211
|
-
cd[0] = 1;
|
212
|
-
|
213
|
-
}
|
214
|
-
|
215
|
-
if(maze[currentplace[0]-1][currentplace[1]] != 1){
|
216
|
-
|
217
|
-
max = 1;
|
218
|
-
|
219
|
-
cd[1] = 1;
|
220
|
-
|
221
|
-
}
|
222
|
-
|
223
|
-
if(maze[currentplace[0]][currentplace[1]+1] != 1){
|
224
|
-
|
225
|
-
max = 2;
|
226
|
-
|
227
|
-
cd[2] = 1;
|
228
|
-
|
229
|
-
}
|
230
|
-
|
231
|
-
if(maze[currentplace[0]+1][currentplace[1]] != 1){
|
232
|
-
|
233
|
-
max = 3;
|
234
|
-
|
235
|
-
cd[3] = 1;
|
236
|
-
|
237
|
-
}
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
for(i = 3; i >= 0; i--){
|
242
|
-
|
243
|
-
if(direction[max] < direction[i] && cd[i] == 1){
|
244
|
-
|
245
|
-
max = i;
|
246
|
-
|
247
|
-
}
|
248
|
-
|
249
|
-
else if(direction[max] == direction[i] && cd[i] == 1){
|
250
|
-
|
251
|
-
b = rand()%2;
|
252
|
-
|
253
|
-
if(b == 0){
|
254
|
-
|
255
|
-
max = i;
|
256
|
-
|
257
|
-
}
|
258
|
-
|
259
|
-
}
|
260
|
-
|
261
|
-
}
|
262
|
-
|
263
|
-
return max;
|
264
|
-
|
265
|
-
}
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
int r(int currentplace0, int currentplace1){//座標(23,23)に来たときのみ報酬100が与えられる
|
270
|
-
|
271
|
-
int r = 0;
|
272
|
-
|
273
|
-
if(currentplace0 == 23 && currentplace1 == 23){
|
274
|
-
|
275
|
-
r = 100;
|
276
|
-
|
277
|
-
}
|
278
|
-
|
279
|
-
return r;
|
280
|
-
|
281
|
-
}
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
int main(){
|
286
|
-
|
287
|
-
int b;
|
288
|
-
|
289
|
-
int i,j,k;
|
290
|
-
|
291
|
-
int ran;
|
292
|
-
|
293
|
-
int e;
|
294
|
-
|
295
|
-
double Q[25][25][4];
|
296
|
-
|
297
|
-
int cp[2] = {1,1};// 現在地の座標
|
298
|
-
|
299
|
-
int direction[4] = {0};//[0] = up, [1] = left; [2] = down, [3] = rightのQ値を格納する
|
300
|
-
|
301
|
-
int nextdirection;
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
srand((unsigned) time(NULL));
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
printf("all 0 or rand\n");
|
310
|
-
|
311
|
-
scanf("%d", &b);
|
312
|
-
|
313
|
-
setQ(Q, b);
|
314
|
-
|
315
|
-
printf("0 <= e <= 100\n");//eが0なら最大化方策、それ以外はεグリーディ方策をとる
|
316
|
-
|
317
|
-
scanf("%d",&e);
|
318
|
-
|
319
|
-
for(j = 0; j < 100; j++){
|
320
|
-
|
321
|
-
cp[0] = 1;
|
322
|
-
|
323
|
-
cp[1] = 1;
|
324
|
-
|
325
|
-
for(i = 0; i < 4; i++){
|
326
|
-
|
327
|
-
direction[i] = Q[cp[0]][cp[1]][i];
|
328
|
-
|
329
|
-
}
|
330
|
-
|
331
|
-
while(cp[0] != 23 || cp[1] != 23){
|
332
|
-
|
333
|
-
ran = rand()%100 + 1;
|
334
|
-
|
335
|
-
if(ran <= e){
|
336
|
-
|
337
|
-
nextdirection = randomroot(direction,cp);
|
338
|
-
|
339
|
-
}
|
340
|
-
|
341
|
-
else{
|
342
|
-
|
343
|
-
nextdirection = max(direction,cp);
|
344
|
-
|
345
|
-
}
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
if(nextdirection == 0){
|
350
|
-
|
351
|
-
cp[1] = cp[1] - 1;
|
352
|
-
|
353
|
-
for(i = 0; i < 4; i++){
|
354
|
-
|
355
|
-
direction[i] = Q[cp[0]][cp[1]][i];
|
356
|
-
|
357
|
-
}
|
358
|
-
|
359
|
-
Q[cp[0]][cp[1]+1][0] += 0.1*(r(cp[0],cp[1]) + 0.95*Q[cp[0]][cp[1]][max(direction,cp)] - Q[cp[0]][cp[1]+1][0]);
|
360
|
-
|
361
|
-
}
|
362
|
-
|
363
|
-
else if(nextdirection == 1){
|
364
|
-
|
365
|
-
cp[0] = cp[0] - 1;
|
366
|
-
|
367
|
-
for(i = 0; i < 4; i++){
|
368
|
-
|
369
|
-
direction[i] = Q[cp[0]][cp[1]][i];
|
370
|
-
|
371
|
-
}
|
372
|
-
|
373
|
-
Q[cp[0]+1][cp[1]][1] += 0.1*(r(cp[0],cp[1]) + 0.95*Q[cp[0]][cp[1]][max(direction,cp)] - Q[cp[0]+1][cp[1]][1]);
|
374
|
-
|
375
|
-
}
|
376
|
-
|
377
|
-
else if(nextdirection == 2){
|
378
|
-
|
379
|
-
cp[1] = cp[1] + 1;
|
380
|
-
|
381
|
-
for(i = 0; i < 4; i++){
|
382
|
-
|
383
|
-
direction[i] = Q[cp[0]][cp[1]][i];
|
384
|
-
|
385
|
-
}
|
386
|
-
|
387
|
-
Q[cp[0]][cp[1]-1][2] += 0.1*(r(cp[0],cp[1]) + 0.95*Q[cp[0]][cp[1]][max(direction,cp)] - Q[cp[0]][cp[1]-1][2]);
|
388
|
-
|
389
|
-
}
|
390
|
-
|
391
|
-
else{
|
392
|
-
|
393
|
-
cp[0] = cp[0] + 1;
|
394
|
-
|
395
|
-
for(i = 0; i < 4; i++){
|
396
|
-
|
397
|
-
direction[i] = Q[cp[0]][cp[1]][i];
|
398
|
-
|
399
|
-
}
|
400
|
-
|
401
|
-
Q[cp[0]-1][cp[1]][3] += 0.1*(r(cp[0],cp[1]) + 0.95*Q[cp[0]][cp[1]][max(direction,cp)] - Q[cp[0]-1][cp[1]][3]);
|
402
|
-
|
403
|
-
}
|
404
|
-
|
405
|
-
}
|
406
|
-
|
407
|
-
}
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
for(i = 0; i < 25; i++){
|
412
|
-
|
413
|
-
printf("\n");
|
414
|
-
|
415
|
-
for(j = 0; j < 25; j++){
|
416
|
-
|
417
|
-
for(k = 0; k < 4; k++){
|
418
|
-
|
419
|
-
printf("{");
|
420
|
-
|
421
|
-
printf("%f ",Q[i][j][k]);
|
422
|
-
|
423
|
-
printf("},");
|
424
|
-
|
425
|
-
}
|
426
|
-
|
427
|
-
}
|
428
|
-
|
429
|
-
}
|
430
|
-
|
431
|
-
return 0;
|
432
|
-
|
433
|
-
}
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
```
|
1
タイトルの変更
test
CHANGED
@@ -1 +1 @@
|
|
1
|
-
Q学習で
|
1
|
+
Q学習で経路の学習をさせるcプログラム
|
test
CHANGED
File without changes
|