質問編集履歴

2

誤字

2016/05/14 12:24

投稿

退会済みユーザー
test CHANGED
File without changes
test CHANGED
@@ -13,425 +13,3 @@
13
13
  また、Whileループを進めてみたところnextdirectionに1が入った後は2が入り、2が入った後は1が入り・・・とループしていることがわかりました。
14
14
 
15
15
  何が原因でこのような挙動をしているのかご教授お願い致します。
16
-
17
- ```c
18
-
19
- コード
20
-
21
- #include<stdio.h>
22
-
23
- #include<math.h>
24
-
25
- #include<stdlib.h>
26
-
27
-
28
-
29
- int maze[25][25] = {
30
-
31
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
32
-
33
- 1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,
34
-
35
- 1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,
36
-
37
- 1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,
38
-
39
- 1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,
40
-
41
- 1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,
42
-
43
- 1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,
44
-
45
- 1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,
46
-
47
- 1,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,
48
-
49
- 1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,
50
-
51
- 1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,
52
-
53
- 1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,1,
54
-
55
- 1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
56
-
57
- 1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
58
-
59
- 1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,
60
-
61
- 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,
62
-
63
- 1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,1,
64
-
65
- 1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,
66
-
67
- 1,0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,1,
68
-
69
- 1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,
70
-
71
- 1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,1,1,
72
-
73
- 1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,
74
-
75
- 1,1,1,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,
76
-
77
- 1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,
78
-
79
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
80
-
81
- };
82
-
83
-
84
-
85
-
86
-
87
- void setQ(double Q[][25][4], int b){//Q値をbが0なら全て0に、それ以外の値なら100以下のランダムな整数をセットする
88
-
89
- int i, j, k;
90
-
91
- srand((unsigned) time(NULL));
92
-
93
- if(b == 0){
94
-
95
- for(i = 0; i < 25; i++){
96
-
97
- for(j = 0; j < 25; j++){
98
-
99
- for(k = 0; k < 4; k++){
100
-
101
- Q[i][j][k] = 0;
102
-
103
- }
104
-
105
- }
106
-
107
- }
108
-
109
- }
110
-
111
- else{
112
-
113
- for(i = 0; i < 25; i++){
114
-
115
- for(j = 0; j < 25; j++){
116
-
117
- for(k = 0; k < 4; k++){
118
-
119
- Q[i][j][k] = rand()%101;
120
-
121
- }
122
-
123
- }
124
-
125
- }
126
-
127
- }
128
-
129
- }
130
-
131
-
132
-
133
- int randomroot(int direction[],int currentplace[]){//現在地から行くことのできる方向の中からランダムで選ぶ
134
-
135
- int i;
136
-
137
- srand((unsigned) time(NULL));
138
-
139
- int r = rand()%5;
140
-
141
- int cd[4] = {0};
142
-
143
- int d[4] ;
144
-
145
-
146
-
147
- if(maze[currentplace[0]][currentplace[1]-1] != 1){
148
-
149
- cd[0] = 1;
150
-
151
- }
152
-
153
- if(maze[currentplace[0]-1][currentplace[1]] != 1){
154
-
155
- cd[1] = 1;
156
-
157
- }
158
-
159
- if(maze[currentplace[0]][currentplace[1]+1] != 1){
160
-
161
- cd[2] = 1;
162
-
163
- }
164
-
165
- if(maze[currentplace[0]+1][currentplace[1]] != 1){
166
-
167
- cd[3] = 1;
168
-
169
- }
170
-
171
- for(i = 0; i < 4; i++){
172
-
173
- d[i] = (i + 1) * cd[i];
174
-
175
- }
176
-
177
- while(r == d[1]||r == d[2]||r == d[3]||r == d[4]){
178
-
179
- r = rand()%4 + 1;
180
-
181
- }
182
-
183
- return r-1;
184
-
185
- }
186
-
187
-
188
-
189
-
190
-
191
- int max(int direction[],int currentplace[]){//現在地から行ける方向の中で最大のQ値をもつ方向を返す
192
-
193
-
194
-
195
- int i;
196
-
197
- int b;
198
-
199
- int max;
200
-
201
- int cd[4] = {0};
202
-
203
- srand((unsigned) time(NULL));
204
-
205
-
206
-
207
- if(maze[currentplace[0]][currentplace[1]-1] != 1){
208
-
209
- max = 0;
210
-
211
- cd[0] = 1;
212
-
213
- }
214
-
215
- if(maze[currentplace[0]-1][currentplace[1]] != 1){
216
-
217
- max = 1;
218
-
219
- cd[1] = 1;
220
-
221
- }
222
-
223
- if(maze[currentplace[0]][currentplace[1]+1] != 1){
224
-
225
- max = 2;
226
-
227
- cd[2] = 1;
228
-
229
- }
230
-
231
- if(maze[currentplace[0]+1][currentplace[1]] != 1){
232
-
233
- max = 3;
234
-
235
- cd[3] = 1;
236
-
237
- }
238
-
239
-
240
-
241
- for(i = 3; i >= 0; i--){
242
-
243
- if(direction[max] < direction[i] && cd[i] == 1){
244
-
245
- max = i;
246
-
247
- }
248
-
249
- else if(direction[max] == direction[i] && cd[i] == 1){
250
-
251
- b = rand()%2;
252
-
253
- if(b == 0){
254
-
255
- max = i;
256
-
257
- }
258
-
259
- }
260
-
261
- }
262
-
263
- return max;
264
-
265
- }
266
-
267
-
268
-
269
- int r(int currentplace0, int currentplace1){//座標(23,23)に来たときのみ報酬100が与えられる
270
-
271
- int r = 0;
272
-
273
- if(currentplace0 == 23 && currentplace1 == 23){
274
-
275
- r = 100;
276
-
277
- }
278
-
279
- return r;
280
-
281
- }
282
-
283
-
284
-
285
- int main(){
286
-
287
- int b;
288
-
289
- int i,j,k;
290
-
291
- int ran;
292
-
293
- int e;
294
-
295
- double Q[25][25][4];
296
-
297
- int cp[2] = {1,1};// 現在地の座標
298
-
299
- int direction[4] = {0};//[0] = up, [1] = left; [2] = down, [3] = rightのQ値を格納する
300
-
301
- int nextdirection;
302
-
303
-
304
-
305
- srand((unsigned) time(NULL));
306
-
307
-
308
-
309
- printf("all 0 or rand\n");
310
-
311
- scanf("%d", &b);
312
-
313
- setQ(Q, b);
314
-
315
- printf("0 <= e <= 100\n");//eが0なら最大化方策、それ以外はεグリーディ方策をとる
316
-
317
- scanf("%d",&e);
318
-
319
- for(j = 0; j < 100; j++){
320
-
321
- cp[0] = 1;
322
-
323
- cp[1] = 1;
324
-
325
- for(i = 0; i < 4; i++){
326
-
327
- direction[i] = Q[cp[0]][cp[1]][i];
328
-
329
- }
330
-
331
- while(cp[0] != 23 || cp[1] != 23){
332
-
333
- ran = rand()%100 + 1;
334
-
335
- if(ran <= e){
336
-
337
- nextdirection = randomroot(direction,cp);
338
-
339
- }
340
-
341
- else{
342
-
343
- nextdirection = max(direction,cp);
344
-
345
- }
346
-
347
-
348
-
349
- if(nextdirection == 0){
350
-
351
- cp[1] = cp[1] - 1;
352
-
353
- for(i = 0; i < 4; i++){
354
-
355
- direction[i] = Q[cp[0]][cp[1]][i];
356
-
357
- }
358
-
359
- Q[cp[0]][cp[1]+1][0] += 0.1*(r(cp[0],cp[1]) + 0.95*Q[cp[0]][cp[1]][max(direction,cp)] - Q[cp[0]][cp[1]+1][0]);
360
-
361
- }
362
-
363
- else if(nextdirection == 1){
364
-
365
- cp[0] = cp[0] - 1;
366
-
367
- for(i = 0; i < 4; i++){
368
-
369
- direction[i] = Q[cp[0]][cp[1]][i];
370
-
371
- }
372
-
373
- Q[cp[0]+1][cp[1]][1] += 0.1*(r(cp[0],cp[1]) + 0.95*Q[cp[0]][cp[1]][max(direction,cp)] - Q[cp[0]+1][cp[1]][1]);
374
-
375
- }
376
-
377
- else if(nextdirection == 2){
378
-
379
- cp[1] = cp[1] + 1;
380
-
381
- for(i = 0; i < 4; i++){
382
-
383
- direction[i] = Q[cp[0]][cp[1]][i];
384
-
385
- }
386
-
387
- Q[cp[0]][cp[1]-1][2] += 0.1*(r(cp[0],cp[1]) + 0.95*Q[cp[0]][cp[1]][max(direction,cp)] - Q[cp[0]][cp[1]-1][2]);
388
-
389
- }
390
-
391
- else{
392
-
393
- cp[0] = cp[0] + 1;
394
-
395
- for(i = 0; i < 4; i++){
396
-
397
- direction[i] = Q[cp[0]][cp[1]][i];
398
-
399
- }
400
-
401
- Q[cp[0]-1][cp[1]][3] += 0.1*(r(cp[0],cp[1]) + 0.95*Q[cp[0]][cp[1]][max(direction,cp)] - Q[cp[0]-1][cp[1]][3]);
402
-
403
- }
404
-
405
- }
406
-
407
- }
408
-
409
-
410
-
411
- for(i = 0; i < 25; i++){
412
-
413
- printf("\n");
414
-
415
- for(j = 0; j < 25; j++){
416
-
417
- for(k = 0; k < 4; k++){
418
-
419
- printf("{");
420
-
421
- printf("%f ",Q[i][j][k]);
422
-
423
- printf("},");
424
-
425
- }
426
-
427
- }
428
-
429
- }
430
-
431
- return 0;
432
-
433
- }
434
-
435
-
436
-
437
- ```

1

タイトルの変更

2016/05/14 12:24

投稿

退会済みユーザー
test CHANGED
@@ -1 +1 @@
1
- Q学習で路を解かせるcプログラム
1
+ Q学習での学習せるcプログラム
test CHANGED
File without changes