質問編集履歴

6

修正

2018/10/13 10:43

投稿

run919
run919

スコア10

test CHANGED
File without changes
test CHANGED
@@ -8,37 +8,37 @@
8
8
 
9
9
  正直自分ではもうどこが間違っているのかわからないので、質問させていただきました。おそらく吸収境界条件が間違っていると考えているのですが。
10
10
 
11
- 動画ではなく、画像ですがいくつか掲載させていただきます。
11
+ 動画ではなく、画像ですがいくつか掲載させていただきます。(プログラムのdt(電界計算+磁界計算時間)は2e-12s)
12
12
 
13
13
 
14
14
 
15
15
  ![イメージ説明](e0c0a20383d6d370001f9ee6503fc858.png)
16
16
 
17
- ___________________t=0.8e-10
17
+ ___________________t=0.8e-10s
18
18
 
19
19
  ![イメージ説明](bf2159a8b51925dd33e40f653807cb0e.png)
20
20
 
21
- ___________________t=3.0e-10
21
+ ___________________t=3.0e-10s
22
22
 
23
23
  ![イメージ説明](6836de43f553d502af21a0a3bed463fe.png)
24
24
 
25
- ___________________t=3.2e-10
25
+ ___________________t=3.2e-10s
26
26
 
27
27
  ![イメージ説明](f49684796ca03254e31c60fd35431082.png)
28
28
 
29
- ___________________t=3.4e-10
29
+ ___________________t=3.4e-10s
30
30
 
31
31
  ![イメージ説明](8f16568e5febefb0e804373a0c053d88.png)
32
32
 
33
- ___________________t=3.6e-10
33
+ ___________________t=3.6e-10s
34
34
 
35
35
  ![![イメージ説明](39626614c97a340b141a73a6d7a5f326.png)
36
36
 
37
- ___________________t=4.0e-10
37
+ ___________________t=4.0e-10s
38
38
 
39
39
  ![イメージ説明](456b82fd27bbbd8f0ddac594c1b2004c.png)
40
40
 
41
- ___________________t=2.0e-9
41
+ ___________________t=2.0e-9s
42
42
 
43
43
 
44
44
 

5

画像追加

2018/10/13 10:43

投稿

run919
run919

スコア10

test CHANGED
File without changes
test CHANGED
@@ -32,10 +32,16 @@
32
32
 
33
33
  ___________________t=3.6e-10
34
34
 
35
- ![![イメージ説明](39626614c97a340b141a73a6d7a5f326.png)](ca236bbb256437b08d877e028fca9a4f.png)
35
+ ![![イメージ説明](39626614c97a340b141a73a6d7a5f326.png)
36
36
 
37
37
  ___________________t=4.0e-10
38
38
 
39
+ ![イメージ説明](456b82fd27bbbd8f0ddac594c1b2004c.png)
40
+
41
+ ___________________t=2.0e-9
42
+
43
+
44
+
39
45
  ### 該当のソースコード
40
46
 
41
47
  ```

4

修正

2018/10/13 10:40

投稿

run919
run919

スコア10

test CHANGED
File without changes
test CHANGED
@@ -22,19 +22,19 @@
22
22
 
23
23
  ![イメージ説明](6836de43f553d502af21a0a3bed463fe.png)
24
24
 
25
- ___________________t=3.2e-11
25
+ ___________________t=3.2e-10
26
26
 
27
27
  ![イメージ説明](f49684796ca03254e31c60fd35431082.png)
28
28
 
29
- ___________________t=3.4e-11
29
+ ___________________t=3.4e-10
30
30
 
31
31
  ![イメージ説明](8f16568e5febefb0e804373a0c053d88.png)
32
32
 
33
- ___________________t=3.6e-11
33
+ ___________________t=3.6e-10
34
34
 
35
35
  ![![イメージ説明](39626614c97a340b141a73a6d7a5f326.png)](ca236bbb256437b08d877e028fca9a4f.png)
36
36
 
37
- ___________________t=40e-11
37
+ ___________________t=4.0e-10
38
38
 
39
39
  ### 該当のソースコード
40
40
 

3

画像の追加

2018/10/13 10:37

投稿

run919
run919

スコア10

test CHANGED
File without changes
test CHANGED
@@ -8,9 +8,33 @@
8
8
 
9
9
  正直自分ではもうどこが間違っているのかわからないので、質問させていただきました。おそらく吸収境界条件が間違っていると考えているのですが。
10
10
 
11
-
11
+ 動画ではなく、画像ですがいくつか掲載させていただきます。
12
+
13
+
14
+
12
-
15
+ ![イメージ説明](e0c0a20383d6d370001f9ee6503fc858.png)
16
+
13
-
17
+ ___________________t=0.8e-10
18
+
19
+ ![イメージ説明](bf2159a8b51925dd33e40f653807cb0e.png)
20
+
21
+ ___________________t=3.0e-10
22
+
23
+ ![イメージ説明](6836de43f553d502af21a0a3bed463fe.png)
24
+
25
+ ___________________t=3.2e-11
26
+
27
+ ![イメージ説明](f49684796ca03254e31c60fd35431082.png)
28
+
29
+ ___________________t=3.4e-11
30
+
31
+ ![イメージ説明](8f16568e5febefb0e804373a0c053d88.png)
32
+
33
+ ___________________t=3.6e-11
34
+
35
+ ![![イメージ説明](39626614c97a340b141a73a6d7a5f326.png)](ca236bbb256437b08d877e028fca9a4f.png)
36
+
37
+ ___________________t=40e-11
14
38
 
15
39
  ### 該当のソースコード
16
40
 

2

修正

2018/10/13 10:36

投稿

run919
run919

スコア10

test CHANGED
File without changes
test CHANGED
@@ -2,434 +2,424 @@
2
2
 
3
3
 
4
4
 
5
- ここに質問の内容を詳しく書いてください。
5
+
6
6
 
7
7
  NVIDIA社のcudaをvisualsudio2015用いて3次元FDTD法を練習しています。今回、サイズ41×41×41の立方体中のx=20,y=20の位置における0<z<41のEzに電圧を常時印加した場合に、z=20におけるxy平面のEzの値を見ているのですが、普通にCでこのプログラムを作った場合はきれいに最後まで円形の画像が出力され続けるのですが、cudaでは出力画像でx=0とy=0付近においてmurの1次吸収境界条件の効きが悪く、それが原因で発散しているような状況となっています。(吸収境界条件を入れずに完全導体で囲んだ場合はきれいに反射された画像が続く)
8
8
 
9
9
  正直自分ではもうどこが間違っているのかわからないので、質問させていただきました。おそらく吸収境界条件が間違っていると考えているのですが。
10
10
 
11
- ■■な機能を実装中に以下のエラーメッセージが発生しました。
11
+
12
-
13
-
14
-
12
+
13
+
14
+
15
- ### 発生している問題・エラメッセ
15
+ ### 該当のソスコ
16
-
17
-
18
16
 
19
17
  ```
20
18
 
21
- エラーッセージ
19
+ /*--------------------------------------------イン計算部分------------------------------------------------------------*/
20
+
21
+ while (t[0]<2e-9) {
22
+
23
+
24
+
25
+ culcEx << <grid, block >> > (Ex_d, oldEx_d, oldHy_d, oldHz_d, A_d, d_d);
26
+
27
+ culcEy << <grid, block >> > (Ey_d, oldEy_d, oldHx_d, oldHz_d, A_d, d_d);
28
+
29
+ culcEz << <grid, block >> > (Ez_d, oldEz_d, oldHx_d, oldHy_d, A_d, d_d);
30
+
31
+ cudaDeviceSynchronize();
32
+
33
+
34
+
35
+
36
+
37
+ vin[0] = 1.0*sin(2.0e10*M_PI*t[0]);
38
+
39
+ cudaMemcpy(vin_d, vin, sizeof(double), cudaMemcpyHostToDevice);
40
+
41
+ Init << <grid, block >> > ( Ez_d, t_d, vin_d);
42
+
43
+ cudaDeviceSynchronize();
44
+
45
+ Init2 << <grid, block >> > (Ex_d, Ey_d, Ez_d);
46
+
47
+ cudaDeviceSynchronize();
48
+
49
+ MurEx << <grid, block >> > (Ex_d, oldEx_d, A_d);
50
+
51
+ MurEy << <grid, block >> > (Ey_d, oldEy_d, A_d);
52
+
53
+ MurEz << <grid, block >> > (Ez_d, oldEz_d, A_d);
54
+
55
+ cudaDeviceSynchronize();
56
+
57
+
58
+
59
+
60
+
61
+ culct << <2,2 >> > (t_d);
62
+
63
+ t[0] += (dt / 2);
64
+
65
+ cudaDeviceSynchronize();
66
+
67
+
68
+
69
+ culcHx << <grid, block >> > (Ey_d, Ez_d, Hx_d, oldHx_d, A_d, d_d);
70
+
71
+ culcHy << <grid, block >> > (Ex_d, Ez_d, Hy_d, oldHy_d, A_d, d_d);
72
+
73
+ culcHz << <grid, block >> > (Ex_d, Ey_d, Hz_d, oldHz_d, A_d, d_d);
74
+
75
+ cudaDeviceSynchronize();
76
+
77
+
78
+
79
+ change << <grid, block >> > (Ex_d, Ey_d, Ez_d, oldEx_d, oldEy_d, oldEz_d, Hx_d, Hy_d, Hz_d, oldHx_d, oldHy_d, oldHz_d);
80
+
81
+ cudaDeviceSynchronize();
82
+
83
+
84
+
85
+ culct << <2, 2 >> > (t_d);
86
+
87
+ t[0] += (dt / 2);
88
+
89
+ cudaDeviceSynchronize();
90
+
91
+ output << <grid, block >> > (out_d, Ez_d);
92
+
93
+ cudaDeviceSynchronize();
94
+
95
+
96
+
97
+ }
98
+
99
+ /*---------------------------------------------------------------------------------------------------------------------*/
100
+
101
+
102
+
103
+ /*------------------吸収境界条件部分----------------------------*/
104
+
105
+ __global__ void MurEx(double *Ex, double *oldEx, double *A){
106
+
107
+ int i = blockIdx.x*blockDim.x + threadIdx.x;
108
+
109
+ int j = blockIdx.y*blockDim.y + threadIdx.y;
110
+
111
+ int k = blockIdx.z*blockDim.z + threadIdx.z;
112
+
113
+
114
+
115
+ if (i < nx && j < ny && k < nz) {
116
+
117
+ Ex[(k * ny * nx + j * nx + 0)] =
118
+
119
+ oldEx[(k * ny * nx + j * nx + 1)] + A[3] * (Ex[(k * ny * nx + j * nx + 1)] - oldEx[(k * ny * nx + j * nx + 0)]);
120
+
121
+ Ex[(k * ny * nx + j * nx + nx - 1)] = oldEx[(k * ny * nx + j * nx + nx - 2)] + A[3] * (Ex[(k * ny * nx + j * nx + nx - 2)] - oldEx[(k * ny * nx + j * nx + nx - 1)]);
122
+
123
+ }
124
+
125
+ __syncthreads();
126
+
127
+ }
128
+
129
+
130
+
131
+ __global__ void MurEy( double *Ey, double *oldEy, double *A){
132
+
133
+ int i = blockIdx.x*blockDim.x + threadIdx.x;
134
+
135
+ int j = blockIdx.y*blockDim.y + threadIdx.y;
136
+
137
+ int k = blockIdx.z*blockDim.z + threadIdx.z;
138
+
139
+
140
+
141
+ if (i < nx && j < ny && k < nz) {
142
+
143
+ Ey[(k * ny * nx + 0 * nx + i)] =
144
+
145
+ oldEy[(k * ny * nx + 1 * nx + i)] + A[3] * (Ey[(k * ny * nx + 1 * nx + i)] - oldEy[(k * ny * nx + 0 * nx + i)]);
146
+
147
+ Ey[(k * ny * nx + (ny - 1) * nx + i)] =
148
+
149
+ oldEy[(k * ny * nx + (ny - 2) * nx + i)] + A[3] * (Ey[(k * ny * nx + (ny - 2) * nx + i)] - oldEy[(k * ny * nx + (ny - 1) * nx + i)]);
150
+
151
+ }
152
+
153
+ __syncthreads();
154
+
155
+ }
156
+
157
+
158
+
159
+ __global__ void MurEz( double *Ez, double *oldEz, double *A){
160
+
161
+ int i = blockIdx.x*blockDim.x + threadIdx.x;
162
+
163
+ int j = blockIdx.y*blockDim.y + threadIdx.y;
164
+
165
+ int k = blockIdx.z*blockDim.z + threadIdx.z;
166
+
167
+
168
+
169
+ if (i < nx && j < ny && k < nz) {
170
+
171
+ Ez[(k * ny * nx + j * nx + 0)] =
172
+
173
+ oldEz[(k * ny * nx + j * nx + 1)] + A[3] * (Ez[(k * ny * nx + j * nx + 1)] - oldEz[(k * ny * nx + j * nx + 0)]);
174
+
175
+ Ez[(k * ny * nx + j * nx + nx - 1)] =
176
+
177
+ oldEz[(k * ny * nx + j * nx + nx - 2)] + A[3] * (Ez[(k * ny * nx + j * nx + nx - 2)] - oldEz[(k * ny * nx + j * nx + nx - 1)]);
178
+
179
+
180
+
181
+ Ez[(k * ny * nx + 0 * nx + i)] =
182
+
183
+ oldEz[(k * ny * nx + 1 * nx + i)] + A[3] * (Ez[(k * ny * nx + 1 * nx + i)] - oldEz[(k * ny * nx + 0 * nx + i)]);
184
+
185
+ Ez[(k * ny * nx + (ny - 1) * nx + i)]
186
+
187
+ = oldEz[(k * ny * nx + (ny - 2) * nx + i)] + A[3] * (Ez[(k * ny * nx + (ny - 2) * nx + i)] - oldEz[(k * ny * nx + (ny - 1) * nx + i)]);
188
+
189
+ }
190
+
191
+ __syncthreads();
192
+
193
+ }
194
+
195
+ /*------------------------------------------------------------------------------*/
196
+
197
+ //その他補足(プログラムより抜粋)
198
+
199
+ ♯define nx 41
200
+
201
+ ♯define ny 41
202
+
203
+ ♯define nz 41
204
+
205
+
206
+
207
+ dim3 grid;
208
+
209
+ grid.x = 3;
210
+
211
+ grid.y = 3;
212
+
213
+ grid.z = 24;
214
+
215
+ dim3 block;
216
+
217
+ block.x = 16;
218
+
219
+ block.y = 16;
220
+
221
+ block.z = 2;
222
+
223
+ /*--------------------------電磁界計算用係数設定------------------------*/
224
+
225
+ A[0] = ((1 - (sig*dt) / (2 * ep)) / (1 + (sig*dt) / (2 * ep)));
226
+
227
+ A[1] = (dt / ep) / (1 + (sig*dt / (2 * ep)));
228
+
229
+ A[2] = dt / mu;
230
+
231
+ A[3] = (c*dt - dx) / (c*dt + dx);
232
+
233
+ /*-----------------------------------------------
234
+
235
+ 0:電界計算,1:電界計算,2:磁界計算,3:Mur吸収境界条件
236
+
237
+ -------------------------------------------------*/
238
+
239
+ d[0] = 2.0e-12;
240
+
241
+ d[1] = 1.5e-3;
242
+
243
+ d[2] = 1.5e-3;
244
+
245
+ d[3] = 1.5e-3;
246
+
247
+ /*-----------------------------------------------
248
+
249
+ 0:dt,1:dx,2:dy,3:dz
250
+
251
+ -------------------------------------------------*/
252
+
253
+ /*----------------------------電界計算----------------------*/
254
+
255
+ __global__ void culcEx(double *Ex, double *oldEx, double *oldHy, double *oldHz, double *A, double *d)
256
+
257
+ {
258
+
259
+
260
+
261
+ int i = blockIdx.x*blockDim.x + threadIdx.x;
262
+
263
+ int j = blockIdx.y*blockDim.y + threadIdx.y+1;
264
+
265
+ int k = blockIdx.z*blockDim.z + threadIdx.z+1;
266
+
267
+
268
+
269
+ if ((0 < j) && (0 < k) && (i < nx) && (j < ny) && (k < nz)) {
270
+
271
+ Ex[(k * ny * nx + j * nx + i)] = A[0] * oldEx[(k * ny * nx + j * nx + i)] + A[1] * (((oldHz[(k * ny * nx + j * nx + i)] - oldHz[(k * ny * nx + (j - 1) * nx + i)]) / d[2]) - ((oldHy[(k * ny * nx + j * nx + i)] - oldHy[((k - 1) * ny * nx + j * nx + i)]) / d[3])); //4
272
+
273
+ }
274
+
275
+ __syncthreads();
276
+
277
+
278
+
279
+ }
280
+
281
+
282
+
283
+ __global__ void culcEy(double *Ey, double *oldEy, double *oldHx, double *oldHz, double *A, double *d)
284
+
285
+ {
286
+
287
+ int i = blockIdx.x*blockDim.x + threadIdx.x+1;
288
+
289
+ int j = blockIdx.y*blockDim.y + threadIdx.y;
290
+
291
+ int k = blockIdx.z*blockDim.z + threadIdx.z+1;
292
+
293
+
294
+
295
+ if ((0<i) && (0<k) && (i < nx) && (j < ny) && (k < nz)) {
296
+
297
+ Ey[(k * ny * nx + j * nx + i)] = A[0] * oldEy[(k * ny * nx + j * nx + i)] + A[1] * (((oldHx[(k * ny * nx + j * nx + i)] - oldHx[((k - 1) * ny * nx + j * nx + i)]) / d[3]) - ((oldHz[(k * ny * nx + j * nx + i)] - oldHz[(k * ny * nx + j * nx + (i - 1))]) / d[3])); //4
298
+
299
+
300
+
301
+ }
302
+
303
+ __syncthreads();
304
+
305
+
306
+
307
+ }
308
+
309
+
310
+
311
+ __global__ void culcEz(double *Ez, double *oldEz, double *oldHx, double *oldHy, double *A, double *d)
312
+
313
+ {
314
+
315
+ int i = blockIdx.x*blockDim.x + threadIdx.x+1;
316
+
317
+ int j = blockIdx.y*blockDim.y + threadIdx.y+1;
318
+
319
+ int k = blockIdx.z*blockDim.z + threadIdx.z;
320
+
321
+
322
+
323
+ if ((0<i) && (0<j) && (i < nx) && (j < ny) && (k < nz)) {
324
+
325
+ Ez[(k * ny * nx + j * nx + i)] = A[0] * oldEz[(k * ny * nx + j * nx + i)] + A[1] * (((oldHy[(k * ny * nx + j * nx + i)] - oldHy[(k * ny * nx + j * nx + (i - 1))]) / d[1]) - ((oldHx[(k * ny * nx + j * nx + i)] - oldHx[(k * ny * nx + (j - 1) * nx + i)]) / d[2])); //4
326
+
327
+ }
328
+
329
+ __syncthreads();
330
+
331
+
332
+
333
+ }
334
+
335
+ /*----------------------------磁界計算----------------------*/
336
+
337
+ __global__ void culcHx(double *Ey, double *Ez, double *Hx, double *oldHx, double *A, double *d)
338
+
339
+ {
340
+
341
+
342
+
343
+ int i = blockIdx.x*blockDim.x + threadIdx.x;
344
+
345
+ int j = blockIdx.y*blockDim.y + threadIdx.y;
346
+
347
+ int k = blockIdx.z*blockDim.z + threadIdx.z;
348
+
349
+
350
+
351
+ if ((0<i)&&(i<nx) && (j<ny - 1) && (k<nz - 1)) {
352
+
353
+ Hx[k * ny * nx + j * nx + i] = oldHx[k * ny * nx + j * nx + i] - (A[2] * (((Ez[k * ny * nx + (j + 1) * nx + i] - Ez[k * ny * nx + j * nx + i]) / d[2]) - ((Ey[(k + 1) * ny * nx + j * nx + i] - Ey[k * ny * nx + j * nx + i]) / d[3]))); //5
354
+
355
+ }
356
+
357
+ __syncthreads();
358
+
359
+
360
+
361
+ }
362
+
363
+
364
+
365
+
366
+
367
+ __global__ void culcHy(double *Ex, double *Ez, double *Hy, double *oldHy, double *A, double *d)
368
+
369
+ {
370
+
371
+ int i = blockIdx.x*blockDim.x + threadIdx.x;
372
+
373
+ int j = blockIdx.y*blockDim.y + threadIdx.y;
374
+
375
+ int k = blockIdx.z*blockDim.z + threadIdx.z;
376
+
377
+
378
+
379
+ if ((0<j) && (i<nx - 1) && (j<ny) && (k<nz - 1)) {
380
+
381
+ Hy[k * ny * nx + j * nx + i] = oldHy[k * ny * nx + j * nx + i]- (A[2] * (((Ex[(k + 1) * ny * nx + j * nx + i] - Ex[k * ny * nx + j * nx + i]) / d[3]) - ((Ez[k * ny * nx + j * nx + (i + 1)] - Ez[k * ny * nx + j * nx + i]) / d[1]))); //6
382
+
383
+ }
384
+
385
+ __syncthreads();
386
+
387
+
388
+
389
+ }
390
+
391
+
392
+
393
+
394
+
395
+ __global__ void culcHz(double *Ex, double *Ey, double *Hz, double *oldHz, double *A, double *d)
396
+
397
+ {
398
+
399
+ int i = blockIdx.x*blockDim.x + threadIdx.x;
400
+
401
+ int j = blockIdx.y*blockDim.y + threadIdx.y;
402
+
403
+ int k = blockIdx.z*blockDim.z + threadIdx.z;
404
+
405
+
406
+
407
+ if ((0<j) && (i<nx - 1) && j<(ny - 1) && k<nz) {
408
+
409
+ Hz[k * ny * nx + j * nx + i] = oldHz[k * ny * nx + j * nx + i]- (A[2] * (((Ey[k * ny * nx + j * nx + (i + 1)] - Ey[k * ny * nx + j * nx + i]) / d[1]) - ((Ex[k * ny * nx + (j + 1) * nx + i] - Ex[k * ny * nx + j * nx + i]) / d[2]))); //6
410
+
411
+ }
412
+
413
+ __syncthreads();
414
+
415
+
416
+
417
+ }
22
418
 
23
419
  ```
24
420
 
25
421
 
26
422
 
27
- ### 該当のソースコード
28
-
29
- /*--------------------------------------------メイン計算部分------------------------------------------------------------*/
30
-
31
- while (t[0]<2e-9) {
32
-
33
-
34
-
35
- culcEx << <grid, block >> > (Ex_d, oldEx_d, oldHy_d, oldHz_d, A_d, d_d);
36
-
37
- culcEy << <grid, block >> > (Ey_d, oldEy_d, oldHx_d, oldHz_d, A_d, d_d);
38
-
39
- culcEz << <grid, block >> > (Ez_d, oldEz_d, oldHx_d, oldHy_d, A_d, d_d);
40
-
41
- cudaDeviceSynchronize();
42
-
43
-
44
-
45
-
46
-
47
- vin[0] = 1.0*sin(2.0e10*M_PI*t[0]);
48
-
49
- cudaMemcpy(vin_d, vin, sizeof(double), cudaMemcpyHostToDevice);
50
-
51
- Init << <grid, block >> > ( Ez_d, t_d, vin_d);
52
-
53
- cudaDeviceSynchronize();
54
-
55
- Init2 << <grid, block >> > (Ex_d, Ey_d, Ez_d);
56
-
57
- cudaDeviceSynchronize();
58
-
59
- MurEx << <grid, block >> > (Ex_d, oldEx_d, A_d);
60
-
61
- MurEy << <grid, block >> > (Ey_d, oldEy_d, A_d);
62
-
63
- MurEz << <grid, block >> > (Ez_d, oldEz_d, A_d);
64
-
65
- cudaDeviceSynchronize();
66
-
67
-
68
-
69
-
70
-
71
- culct << <2,2 >> > (t_d);
72
-
73
- t[0] += (dt / 2);
74
-
75
- cudaDeviceSynchronize();
76
-
77
-
78
-
79
- culcHx << <grid, block >> > (Ey_d, Ez_d, Hx_d, oldHx_d, A_d, d_d);
80
-
81
- culcHy << <grid, block >> > (Ex_d, Ez_d, Hy_d, oldHy_d, A_d, d_d);
82
-
83
- culcHz << <grid, block >> > (Ex_d, Ey_d, Hz_d, oldHz_d, A_d, d_d);
84
-
85
- cudaDeviceSynchronize();
86
-
87
-
88
-
89
- change << <grid, block >> > (Ex_d, Ey_d, Ez_d, oldEx_d, oldEy_d, oldEz_d, Hx_d, Hy_d, Hz_d, oldHx_d, oldHy_d, oldHz_d);
90
-
91
- cudaDeviceSynchronize();
92
-
93
-
94
-
95
- culct << <2, 2 >> > (t_d);
96
-
97
- t[0] += (dt / 2);
98
-
99
- cudaDeviceSynchronize();
100
-
101
- output << <grid, block >> > (out_d, Ez_d);
102
-
103
- cudaDeviceSynchronize();
104
-
105
-
106
-
107
- }
108
-
109
- /*---------------------------------------------------------------------------------------------------------------------*/
110
-
111
-
112
-
113
- /*------------------吸収境界条件部分----------------------------*/
114
-
115
- __global__ void MurEx(double *Ex, double *oldEx, double *A){
116
-
117
- int i = blockIdx.x*blockDim.x + threadIdx.x;
118
-
119
- int j = blockIdx.y*blockDim.y + threadIdx.y;
120
-
121
- int k = blockIdx.z*blockDim.z + threadIdx.z;
122
-
123
-
124
-
125
- if (i < nx && j < ny && k < nz) {
126
-
127
- Ex[(k * ny * nx + j * nx + 0)] =
128
-
129
- oldEx[(k * ny * nx + j * nx + 1)] + A[3] * (Ex[(k * ny * nx + j * nx + 1)] - oldEx[(k * ny * nx + j * nx + 0)]);
130
-
131
- Ex[(k * ny * nx + j * nx + nx - 1)] = oldEx[(k * ny * nx + j * nx + nx - 2)] + A[3] * (Ex[(k * ny * nx + j * nx + nx - 2)] - oldEx[(k * ny * nx + j * nx + nx - 1)]);
132
-
133
- }
134
-
135
- __syncthreads();
136
-
137
- }
138
-
139
-
140
-
141
- __global__ void MurEy( double *Ey, double *oldEy, double *A){
142
-
143
- int i = blockIdx.x*blockDim.x + threadIdx.x;
144
-
145
- int j = blockIdx.y*blockDim.y + threadIdx.y;
146
-
147
- int k = blockIdx.z*blockDim.z + threadIdx.z;
148
-
149
-
150
-
151
- if (i < nx && j < ny && k < nz) {
152
-
153
- Ey[(k * ny * nx + 0 * nx + i)] =
154
-
155
- oldEy[(k * ny * nx + 1 * nx + i)] + A[3] * (Ey[(k * ny * nx + 1 * nx + i)] - oldEy[(k * ny * nx + 0 * nx + i)]);
156
-
157
- Ey[(k * ny * nx + (ny - 1) * nx + i)] =
158
-
159
- oldEy[(k * ny * nx + (ny - 2) * nx + i)] + A[3] * (Ey[(k * ny * nx + (ny - 2) * nx + i)] - oldEy[(k * ny * nx + (ny - 1) * nx + i)]);
160
-
161
- }
162
-
163
- __syncthreads();
164
-
165
- }
166
-
167
-
168
-
169
- __global__ void MurEz( double *Ez, double *oldEz, double *A){
170
-
171
- int i = blockIdx.x*blockDim.x + threadIdx.x;
172
-
173
- int j = blockIdx.y*blockDim.y + threadIdx.y;
174
-
175
- int k = blockIdx.z*blockDim.z + threadIdx.z;
176
-
177
-
178
-
179
- if (i < nx && j < ny && k < nz) {
180
-
181
- Ez[(k * ny * nx + j * nx + 0)] =
182
-
183
- oldEz[(k * ny * nx + j * nx + 1)] + A[3] * (Ez[(k * ny * nx + j * nx + 1)] - oldEz[(k * ny * nx + j * nx + 0)]);
184
-
185
- Ez[(k * ny * nx + j * nx + nx - 1)] =
186
-
187
- oldEz[(k * ny * nx + j * nx + nx - 2)] + A[3] * (Ez[(k * ny * nx + j * nx + nx - 2)] - oldEz[(k * ny * nx + j * nx + nx - 1)]);
188
-
189
-
190
-
191
- Ez[(k * ny * nx + 0 * nx + i)] =
192
-
193
- oldEz[(k * ny * nx + 1 * nx + i)] + A[3] * (Ez[(k * ny * nx + 1 * nx + i)] - oldEz[(k * ny * nx + 0 * nx + i)]);
194
-
195
- Ez[(k * ny * nx + (ny - 1) * nx + i)]
196
-
197
- = oldEz[(k * ny * nx + (ny - 2) * nx + i)] + A[3] * (Ez[(k * ny * nx + (ny - 2) * nx + i)] - oldEz[(k * ny * nx + (ny - 1) * nx + i)]);
198
-
199
- }
200
-
201
- __syncthreads();
202
-
203
- }
204
-
205
- /*------------------------------------------------------------------------------*/
206
-
207
- //その他補足(プログラムより抜粋)
208
-
209
- ♯define nx 41
210
-
211
- ♯define ny 41
212
-
213
- ♯define nz 41
214
-
215
-
216
-
217
- dim3 grid;
218
-
219
- grid.x = 3;
220
-
221
- grid.y = 3;
222
-
223
- grid.z = 24;
224
-
225
- dim3 block;
226
-
227
- block.x = 16;
228
-
229
- block.y = 16;
230
-
231
- block.z = 2;
232
-
233
- /*--------------------------電磁界計算用係数設定------------------------*/
234
-
235
- A[0] = ((1 - (sig*dt) / (2 * ep)) / (1 + (sig*dt) / (2 * ep)));
236
-
237
- A[1] = (dt / ep) / (1 + (sig*dt / (2 * ep)));
238
-
239
- A[2] = dt / mu;
240
-
241
- A[3] = (c*dt - dx) / (c*dt + dx);
242
-
243
- /*-----------------------------------------------
244
-
245
- 0:電界計算,1:電界計算,2:磁界計算,3:Mur吸収境界条件
246
-
247
- -------------------------------------------------*/
248
-
249
- d[0] = 2.0e-12;
250
-
251
- d[1] = 1.5e-3;
252
-
253
- d[2] = 1.5e-3;
254
-
255
- d[3] = 1.5e-3;
256
-
257
- /*-----------------------------------------------
258
-
259
- 0:dt,1:dx,2:dy,3:dz
260
-
261
- -------------------------------------------------*/
262
-
263
- /*----------------------------電界計算----------------------*/
264
-
265
- __global__ void culcEx(double *Ex, double *oldEx, double *oldHy, double *oldHz, double *A, double *d)
266
-
267
- {
268
-
269
-
270
-
271
- int i = blockIdx.x*blockDim.x + threadIdx.x;
272
-
273
- int j = blockIdx.y*blockDim.y + threadIdx.y+1;
274
-
275
- int k = blockIdx.z*blockDim.z + threadIdx.z+1;
276
-
277
-
278
-
279
- if ((0 < j) && (0 < k) && (i < nx) && (j < ny) && (k < nz)) {
280
-
281
- Ex[(k * ny * nx + j * nx + i)] = A[0] * oldEx[(k * ny * nx + j * nx + i)] + A[1] * (((oldHz[(k * ny * nx + j * nx + i)] - oldHz[(k * ny * nx + (j - 1) * nx + i)]) / d[2]) - ((oldHy[(k * ny * nx + j * nx + i)] - oldHy[((k - 1) * ny * nx + j * nx + i)]) / d[3])); //4
282
-
283
- }
284
-
285
- __syncthreads();
286
-
287
-
288
-
289
- }
290
-
291
-
292
-
293
- __global__ void culcEy(double *Ey, double *oldEy, double *oldHx, double *oldHz, double *A, double *d)
294
-
295
- {
296
-
297
- int i = blockIdx.x*blockDim.x + threadIdx.x+1;
298
-
299
- int j = blockIdx.y*blockDim.y + threadIdx.y;
300
-
301
- int k = blockIdx.z*blockDim.z + threadIdx.z+1;
302
-
303
-
304
-
305
- if ((0<i) && (0<k) && (i < nx) && (j < ny) && (k < nz)) {
306
-
307
- Ey[(k * ny * nx + j * nx + i)] = A[0] * oldEy[(k * ny * nx + j * nx + i)] + A[1] * (((oldHx[(k * ny * nx + j * nx + i)] - oldHx[((k - 1) * ny * nx + j * nx + i)]) / d[3]) - ((oldHz[(k * ny * nx + j * nx + i)] - oldHz[(k * ny * nx + j * nx + (i - 1))]) / d[3])); //4
308
-
309
-
310
-
311
- }
312
-
313
- __syncthreads();
314
-
315
-
316
-
317
- }
318
-
319
-
320
-
321
- __global__ void culcEz(double *Ez, double *oldEz, double *oldHx, double *oldHy, double *A, double *d)
322
-
323
- {
324
-
325
- int i = blockIdx.x*blockDim.x + threadIdx.x+1;
326
-
327
- int j = blockIdx.y*blockDim.y + threadIdx.y+1;
328
-
329
- int k = blockIdx.z*blockDim.z + threadIdx.z;
330
-
331
-
332
-
333
- if ((0<i) && (0<j) && (i < nx) && (j < ny) && (k < nz)) {
334
-
335
- Ez[(k * ny * nx + j * nx + i)] = A[0] * oldEz[(k * ny * nx + j * nx + i)] + A[1] * (((oldHy[(k * ny * nx + j * nx + i)] - oldHy[(k * ny * nx + j * nx + (i - 1))]) / d[1]) - ((oldHx[(k * ny * nx + j * nx + i)] - oldHx[(k * ny * nx + (j - 1) * nx + i)]) / d[2])); //4
336
-
337
- }
338
-
339
- __syncthreads();
340
-
341
-
342
-
343
- }
344
-
345
- /*----------------------------磁界計算----------------------*/
346
-
347
- __global__ void culcHx(double *Ey, double *Ez, double *Hx, double *oldHx, double *A, double *d)
348
-
349
- {
350
-
351
-
352
-
353
- int i = blockIdx.x*blockDim.x + threadIdx.x;
354
-
355
- int j = blockIdx.y*blockDim.y + threadIdx.y;
356
-
357
- int k = blockIdx.z*blockDim.z + threadIdx.z;
358
-
359
-
360
-
361
- if ((0<i)&&(i<nx) && (j<ny - 1) && (k<nz - 1)) {
362
-
363
- Hx[k * ny * nx + j * nx + i] = oldHx[k * ny * nx + j * nx + i] - (A[2] * (((Ez[k * ny * nx + (j + 1) * nx + i] - Ez[k * ny * nx + j * nx + i]) / d[2]) - ((Ey[(k + 1) * ny * nx + j * nx + i] - Ey[k * ny * nx + j * nx + i]) / d[3]))); //5
364
-
365
- }
366
-
367
- __syncthreads();
368
-
369
-
370
-
371
- }
372
-
373
-
374
-
375
-
376
-
377
- __global__ void culcHy(double *Ex, double *Ez, double *Hy, double *oldHy, double *A, double *d)
378
-
379
- {
380
-
381
- int i = blockIdx.x*blockDim.x + threadIdx.x;
382
-
383
- int j = blockIdx.y*blockDim.y + threadIdx.y;
384
-
385
- int k = blockIdx.z*blockDim.z + threadIdx.z;
386
-
387
-
388
-
389
- if ((0<j) && (i<nx - 1) && (j<ny) && (k<nz - 1)) {
390
-
391
- Hy[k * ny * nx + j * nx + i] = oldHy[k * ny * nx + j * nx + i]- (A[2] * (((Ex[(k + 1) * ny * nx + j * nx + i] - Ex[k * ny * nx + j * nx + i]) / d[3]) - ((Ez[k * ny * nx + j * nx + (i + 1)] - Ez[k * ny * nx + j * nx + i]) / d[1]))); //6
392
-
393
- }
394
-
395
- __syncthreads();
396
-
397
-
398
-
399
- }
400
-
401
-
402
-
403
-
404
-
405
- __global__ void culcHz(double *Ex, double *Ey, double *Hz, double *oldHz, double *A, double *d)
406
-
407
- {
408
-
409
- int i = blockIdx.x*blockDim.x + threadIdx.x;
410
-
411
- int j = blockIdx.y*blockDim.y + threadIdx.y;
412
-
413
- int k = blockIdx.z*blockDim.z + threadIdx.z;
414
-
415
-
416
-
417
- if ((0<j) && (i<nx - 1) && j<(ny - 1) && k<nz) {
418
-
419
- Hz[k * ny * nx + j * nx + i] = oldHz[k * ny * nx + j * nx + i]- (A[2] * (((Ey[k * ny * nx + j * nx + (i + 1)] - Ey[k * ny * nx + j * nx + i]) / d[1]) - ((Ex[k * ny * nx + (j + 1) * nx + i] - Ex[k * ny * nx + j * nx + i]) / d[2]))); //6
420
-
421
- }
422
-
423
- __syncthreads();
424
-
425
-
426
-
427
- }
428
-
429
-
430
-
431
-
432
-
433
423
 
434
424
 
435
425
 

1

誤字

2018/10/13 10:21

投稿

run919
run919

スコア10

test CHANGED
File without changes
test CHANGED
@@ -448,11 +448,7 @@
448
448
 
449
449
  i = blockIdx.x*blockDim.x + threadIdx.x+1;のようにしてみたり
450
450
 
451
- メイン関数中のMurE関数がくるところを
452
-
453
-
454
-
455
- ように変更してCPUで計算させてもみましたがダメでした。
451
+ メイン関数中MurE関数がくるところを変更してCPUで計算させてもみましたがダメでした。
456
452
 
457
453
  ### 補足情報(FW/ツールのバージョンなど)
458
454