This thread has been locked.
If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.
void DSPF_dp_mat_mul_gemm(double *x1, double const a, const int r1, const int c1, double *x2, const int c2, double *restrict y) { int i, j, k, xoff1, xoff2; double sum0, sum1, sum2, sum3; double x00, x01, y00, y01, y10, y11, x10, x11; double *ptr_x, *ptr_y, *restrict y1, *restrict y2; _nassert(r1 > 0); _nassert(c1 > 0); _nassert(c2 > 0); _nassert((int)x1 % 8 == 0); _nassert((int)x2 % 8 == 0); _nassert((int)y % 8 == 0); _nassert(c1 % 2 == 0 ); _nassert(r1 % 2 == 0 ); _nassert(c2 % 2 == 0 ); #pragma MUST_ITERATE(1,,) for (j = 0; j < c2; j+=2) { xoff2 = j * c1; y1 = &y[(j + 0) * r1]; y2 = &y[(j + 1) * r1]; #pragma MUST_ITERATE(1,,) for (i = 0; i < r1; i+=2) { xoff1 = i * c1; sum0 = 0; sum1 = 0; sum2 = 0; sum3 = 0; ptr_x = &x1[xoff1]; ptr_y = &x2[xoff2]; #pragma MUST_ITERATE(1,,) for (k = 0; k < c1; k+=2,ptr_x+=2,ptr_y+=2) { x00 = ptr_x[0]; x01 = ptr_x[1]; x10 = ptr_x[c1]; x11 = ptr_x[c1 + 1]; y00 = ptr_y[0]; y01 = ptr_y[c1]; y10 = ptr_y[1]; y11 = ptr_y[c1 + 1]; sum0 += x00 * y00 + x01 * y10; sum1 += x00 * y01 + x01 * y11; sum2 += x10 * y00 + x11 * y10; sum3 += x10 * y01 + x11 * y11; } y1[(i + 0)] += a*sum0; y2[(i + 0)] += a*sum1; y1[(i + 1)] += a*sum2; y2[(i + 1)] += a*sum3; } } }
void DSPF_dp_mat_mul_gemm_test(double *x1,double const a, const int r1, const int c1, double *x2, const int c2, double *restrict y) { int i, j, k, xoff1, xoff2; double sum0, sum1, sum2, sum3; double x00, x01, y00, y01, y10, y11, x10, x11; double *ptr_x, *ptr_y, *y1, *y2; _nassert(r1 > 0); _nassert(c1 > 0); _nassert(c2 > 0); _nassert((int)x1 % 8 == 0); _nassert((int)x2 % 8 == 0); _nassert((int)y % 8 == 0); _nassert(c1 % 2 == 0 ); _nassert(r1 % 2 == 0 ); _nassert(c2 % 2 == 0 ); #pragma MUST_ITERATE(1,,) for (j = 0; j < c2; j+=2) { xoff2 = j; #pragma MUST_ITERATE(1,,) for (i = 0; i < r1; i+=2) { xoff1 = i * c1; y1 =&y[j+i*c2]; y2 =&y[j+c2+i*c2]; sum0 = 0; sum1 = 0; sum2 = 0; sum3 = 0; ptr_x = &x1[xoff1]; ptr_y = &x2[xoff2]; #pragma MUST_ITERATE(1,,) for (k = 0; k < c1; k+=2,ptr_x+=2,ptr_y+=c2*2) { x00 = ptr_x[0]; x01 = ptr_x[1]; x10 = ptr_x[c1]; x11 = ptr_x[c1 + 1]; y00 = ptr_y[0]; y01 = ptr_y[c2]; y10 = ptr_y[1]; y11 = ptr_y[c2 + 1]; sum0 += x00 * y00 + x01 * y01; sum1 += x00 * y10 + x01 * y11; sum2 += x10 * y00 + x11 * y01; sum3 += x10 * y10 + x11 * y11; } y1[0] += a*sum0; y2[0] += a*sum2; y1[1] += a*sum1; y2[1] += a*sum3; } } }
请看一下下面对DSPF_dp_mat_mul_Gemm函数的参数定义
software-dl.ti.com/.../group___d_s_p_f__dp__mat__mul__gemm.html