こちらの質問をご覧頂きありがとうございます。
###前提
行列積計算の速度向上を目的に、シュトラッセンアルゴリズムの実装を行っています。
現在のプログラムでは、シュトラッセンの関数(str)に入るたびに2重のポインタと逐次mallocを使うことで二次元配列を実現しています。
###質問
mallocのメモリ確保・開放分の時間を削減するために、このプログラムを、2重ポインタから固定サイズの2次元配列で書き換え、実現することは可能でしょうか。
ソースコードの方に現在の2重ポインタを使用したプログラムを添付します。
ご回答の程よろしくお願いします。
###ソースコード
C
1#include <stdio.h> 2#include <math.h> 3#include <stdlib.h> 4#include <sys/time.h> 5#include <sys/resource.h> 6 7#define BASE 2 8 9void input(double **, double **, int n); 10void output(double **, int n); 11void str(double **, double **, double **, int n); 12void prod(double **, double **, double **, int n); 13 14double e_time(void); 15double t1, t2, t3 = 0, elapsed; 16 17int main(int argc, char *argv[]) 18{ 19 int i, j, n, size; 20 double **A, **B, **C; 21 22 if (argc <= 1) { 23 fprintf(stderr, "error: argument which indicates the problem size is required.\n"); 24 exit(1); 25 } 26 n = atoi(argv[1]); 27 size = n + 1; 28 29 /* Get the memory space for arrays A, B, C. */ 30 A = (double **)malloc(size * sizeof(double *)); 31 B = (double **)malloc(size * sizeof(double *)); 32 C = (double **)malloc(size * sizeof(double *)); 33 for (i = 1; i <= size; i++) { 34 *(A + i) = (double *)malloc(size * sizeof(double)); 35 *(B + i) = (double *)malloc(size * sizeof(double)); 36 *(C + i) = (double *)malloc(size * sizeof(double)); 37 } 38 39 40 input(A, B, n); 41 //reset vectors 42 for(i = 1; i < size; i++) { 43 for(j = 1; j < size; j++) { 44 C[i][j] = 0.0; 45 } 46 } 47#ifdef DEBUG 48 printf("A = \n"); 49 output(A, n); 50 printf("B = \n"); 51 output(B, n); 52#endif 53 54 t1 = e_time(); 55 str(A, B, C, n); 56 prod(A, B, C, n); 57 t2 = e_time(); 58 //elapsed = (t2 -t1) + t3; 59 elapsed = t2 -t1; 60 printf("t1:%f, t2:%f, elapsed:%f\n", t1, t2, elapsed); 61 printf("Performance : %f MFLOPS\n", (2.0*size*size*size)/elapsed/1000000.0); 62 63#ifdef DEBUG 64 printf("C = \n"); 65 output(C, n); 66#endif 67 68 for (i = 1; i <= size; i++) { 69 free(*(A+i)); 70 free(*(B+i)); 71 free(*(C+i)); 72 } 73 free(A); 74 free(B); 75 free(C); 76 77 return 0; 78} 79 80void input(double **A, double **B, int n) 81{ 82 int i, j; 83 84 for (i = 1; i <= n; i++) { 85 for (j = 1; j <= n; j++) { 86 A[i][j] = (double)rand()/RAND_MAX; 87 B[i][j] = (double)rand()/RAND_MAX; 88 } 89 } 90} 91 92void output(double **O, int n) 93{ 94 int i, j; 95 96 for (i = 1; i <= n; i++) { 97 for (j = 1; j <= n; j++) 98 printf("%3.6f ", O[i][j]); 99 printf("\n"); 100 } 101} 102 103void str(double **A, double **B, double **C, int n) 104{ 105 int i, j, mid, size; 106 double **X, **Y, **M1, **M2, **M3, **M4, **M5, **M6, **M7; 107 double t4, t5; 108 109 if (n <= BASE) { 110 t4 = e_time(); 111 prod(A, B, C, n); 112 t5 = e_time(); 113 t3 += (t5 - t4); 114 return; 115 } 116 mid = n / 2; 117 118 /* Get the memory space for arrays X, Y, M1--M7 here. */ 119 size = n + 1; 120 X = (double **)malloc(size * sizeof(double *)); 121 Y = (double **)malloc(size * sizeof(double *)); 122 M1 = (double **)malloc(size * sizeof(double *)); 123 M2 = (double **)malloc(size * sizeof(double *)); 124 M3 = (double **)malloc(size * sizeof(double *)); 125 M4 = (double **)malloc(size * sizeof(double *)); 126 M5 = (double **)malloc(size * sizeof(double *)); 127 M6 = (double **)malloc(size * sizeof(double *)); 128 M7 = (double **)malloc(size * sizeof(double *)); 129 for (i = 1; i <= mid; i++) { 130 *(X+i) = (double *)malloc(size*sizeof(double)); 131 *(Y+i) = (double *)malloc(size*sizeof(double)); 132 *(M1+i) = (double *)malloc(size*sizeof(double)); 133 *(M2+i) = (double *)malloc(size*sizeof(double)); 134 *(M3+i) = (double *)malloc(size*sizeof(double)); 135 *(M4+i) = (double *)malloc(size*sizeof(double)); 136 *(M5+i) = (double *)malloc(size*sizeof(double)); 137 *(M6+i) = (double *)malloc(size*sizeof(double)); 138 *(M7+i) = (double *)malloc(size*sizeof(double)); 139 } 140 141 t4 = e_time(); 142 /* Compute M1 */ 143 for (i = 1; i <= mid; i++) 144 for (j = 1; j <= mid; j++) { 145 X[i][j] = A[i][mid+j] - A[mid+i][mid+j]; 146 Y[i][j] = B[mid+i][j] + B[mid+i][mid+j]; 147 } 148 str(X,Y,M1,mid); 149 150 151 /* Compute M2--M7 */ 152 // M2 153 for (i = 1; i <= mid; i++) { 154 for (j = 1; j <= mid; j++) { 155 X[i][j] = A[i][j] + A[mid+i][mid+j]; 156 Y[i][j] = B[i][j] + B[mid+i][mid+j]; 157 } 158 } 159 str(X,Y,M2,mid); 160 161 // M3 162 for (i = 1; i <= mid; i++) { 163 for (j = 1; j <= mid; j++) { 164 X[i][j] = A[i][j] - A[mid+i][j]; 165 Y[i][j] = B[i][j] + B[i][mid+j]; 166 } 167 } 168 str(X,Y,M3,mid); 169 170 // M4 171 for (i = 1; i <= mid; i++) { 172 for (j = 1; j <= mid; j++) { 173 X[i][j] = A[i][j] + A[i][mid+j]; 174 Y[i][j] = B[mid+i][mid+j]; 175 } 176 } 177 str(X,Y,M4,mid); 178 179 // M5 180 for (i = 1; i <= mid; i++) { 181 for (j = 1; j <= mid; j++) { 182 X[i][j] = A[i][j]; 183 Y[i][j] = B[i][mid+j] - B[mid+i][mid+j]; 184 } 185 } 186 str(X,Y,M5,mid); 187 188 // M6 189 for (i = 1; i <= mid; i++) { 190 for (j = 1; j <= mid; j++) { 191 X[i][j] = A[mid+i][mid+j]; 192 Y[i][j] = B[mid+i][j] - B[i][j]; 193 } 194 } 195 str(X,Y,M6,mid); 196 197 // M7 198 for (i = 1; i <= mid; i++) { 199 for (j = 1; j <= mid; j++) { 200 X[i][j] = A[mid+i][j] + A[mid+i][mid+j]; 201 Y[i][j] = B[i][j]; 202 } 203 } 204 str(X,Y,M7,mid); 205 206 207 for (i = 1; i <= mid; i++) { 208 for (j = 1; j <= mid; j++) { 209 C[i][j] = M1[i][j]+M2[i][j]-M4[i][j]+M6[i][j]; /* C11 */ 210 C[i][mid+j] = M4[i][j]+M5[i][j]; /* C12 */ 211 C[mid+i][j] = M6[i][j]+M7[i][j]; /* C21 */ 212 C[mid+i][mid+j] = M2[i][j]-M3[i][j]+M5[i][j]-M7[i][j]; /* C22 */ 213 } 214 } 215 216 217 218 /* Free the memory space for X,Y,M1--M7 here */ 219 for (i = 1; i <= mid; i++) { 220 free(*(X+i)); free(*(Y+i)); free(*(M1+i)); 221 free(*(M2+i)); free(*(M3+i)); free(*(M4+i)); 222 free(*(M5+i)); free(*(M6+i)); free(*(M7+i)); 223 } 224 t5 = e_time(); 225 t3 += t5 - t4; 226 free(X); free(Y); free(M1); 227 free(M2); free(M3); free(M4); 228 free(M5); free(M6); free(M7); 229 230 return; 231} 232 233/* The conventional O(n^3) matrix multiplication algorithm 234 which is used to compute the product of two 2 by 2 matrices. */ 235void prod(double **A, double **B, double **C, int n) 236{ 237 int i, j, k; 238 239 for (i = 1; i <= n; i++) 240 for (j = 1; j <= n; j++) { 241 C[i][j] = 0; 242 for (k = 1; k <= n; k++) { 243 C[i][j] = C[i][j] + A[i][k] * B[k][j]; 244 245 } 246 } 247} 248 249 250 /*************************************************************/ 251 /* functions related to time measurement */ 252 /*************************************************************/ 253 double e_time(void) { 254 255 static struct timeval now; 256 static struct timezone tz; 257 258 gettimeofday(&now, &tz); 259 return (double)(now.tv_sec + now.tv_usec/1000000.0); 260 }
回答3件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
2015/11/11 05:08 編集
2015/11/11 05:47