TDA4VM: Be careful，Correction of possible errors in dsplib source code

chang liu

Part Number: TDA4VM

When I use the source code of dsplib to speed up matrix operations, I find that when I use DSPF_ dp_ mat_ mul_ Gemm got wrong results when multiplying the matrix C=C+A * B. So I tried to analyze the code and made corrections. My code can be used to replace it if you need it. Note that the size of the matrix column and column must be a multiple of 2。I don't know if there is any problem with my change. At least there is no problem with the current test results

source code：

Fullscreen

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
void DSPF_dp_mat_mul_gemm(double *x1, double const a, const int r1, const int c1,
    double *x2, const int c2, double *restrict y)
{
    int    i, j, k, xoff1, xoff2;
    double sum0, sum1, sum2, sum3;
    double x00, x01, y00, y01, y10, y11, x10, x11;
    double *ptr_x, *ptr_y, *restrict y1, *restrict y2;
    _nassert(r1 > 0);
    _nassert(c1 > 0);
    _nassert(c2 > 0);
    _nassert((int)x1 % 8 == 0);
    _nassert((int)x2 % 8 == 0);
    _nassert((int)y  % 8 == 0);
    _nassert(c1 % 2 == 0 );
    _nassert(r1 % 2 == 0 );
    _nassert(c2 % 2 == 0 );
    #pragma MUST_ITERATE(1,,)
    for (j = 0; j < c2; j+=2) {
      xoff2 = j * c1;
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

void DSPF_dp_mat_mul_gemm(double *x1, double const a, const int r1, const int c1,
    double *x2, const int c2, double *restrict y)
{
    int    i, j, k, xoff1, xoff2;
    double sum0, sum1, sum2, sum3;
    double x00, x01, y00, y01, y10, y11, x10, x11;
    double *ptr_x, *ptr_y, *restrict y1, *restrict y2;

    _nassert(r1 > 0);
    _nassert(c1 > 0);
    _nassert(c2 > 0);
    _nassert((int)x1 % 8 == 0);
    _nassert((int)x2 % 8 == 0);
    _nassert((int)y  % 8 == 0);
    _nassert(c1 % 2 == 0 );
    _nassert(r1 % 2 == 0 );
    _nassert(c2 % 2 == 0 );

    #pragma MUST_ITERATE(1,,)
    for (j = 0; j < c2; j+=2) {
      xoff2 = j * c1;
      y1 = &y[(j + 0) * r1];
      y2 = &y[(j + 1) * r1];

      #pragma MUST_ITERATE(1,,)
      for (i = 0; i < r1; i+=2) {                         
        xoff1 = i * c1;                            
        sum0  = 0;
        sum1  = 0;
        sum2  = 0;
        sum3  = 0;
        ptr_x = &x1[xoff1];
        ptr_y = &x2[xoff2];

        #pragma MUST_ITERATE(1,,)
        for (k = 0; k < c1; k+=2,ptr_x+=2,ptr_y+=2) {
          x00 = ptr_x[0];
          x01 = ptr_x[1];
          x10 = ptr_x[c1];
          x11 = ptr_x[c1 + 1];
          y00 = ptr_y[0];
          y01 = ptr_y[c1];
          y10 = ptr_y[1];
          y11 = ptr_y[c1 + 1];

          sum0 += x00 * y00 + x01 * y10;
          sum1 += x00 * y01 + x01 * y11;
          sum2 += x10 * y00 + x11 * y10;
          sum3 += x10 * y01 + x11 * y11;
        }
        y1[(i + 0)] += a*sum0;
        y2[(i + 0)] += a*sum1;
        y1[(i + 1)] += a*sum2;
        y2[(i + 1)] += a*sum3;        
      }
    }                        
}

Modified code：

Fullscreen

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
void DSPF_dp_mat_mul_gemm_test(double *x1,double const a,  const int r1,
                               const int c1, double *x2, const int c2,
                               double *restrict y) {
  int i, j, k, xoff1, xoff2;
  double sum0, sum1, sum2, sum3;
  double x00, x01, y00, y01, y10, y11, x10, x11;
  double *ptr_x, *ptr_y, *y1, *y2;
    _nassert(r1 > 0);
    _nassert(c1 > 0);
    _nassert(c2 > 0);
    _nassert((int)x1 % 8 == 0);
    _nassert((int)x2 % 8 == 0);
    _nassert((int)y  % 8 == 0);
    _nassert(c1 % 2 == 0 );
    _nassert(r1 % 2 == 0 );
    _nassert(c2 % 2 == 0 );
 #pragma MUST_ITERATE(1,,)
    for (j = 0; j < c2; j+=2) {
      xoff2 = j;
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

void DSPF_dp_mat_mul_gemm_test(double *x1,double const a,  const int r1,
                               const int c1, double *x2, const int c2,
                               double *restrict y) {
  int i, j, k, xoff1, xoff2;
  double sum0, sum1, sum2, sum3;
  double x00, x01, y00, y01, y10, y11, x10, x11;
  double *ptr_x, *ptr_y, *y1, *y2;

    _nassert(r1 > 0);
    _nassert(c1 > 0);
    _nassert(c2 > 0);
    _nassert((int)x1 % 8 == 0);
    _nassert((int)x2 % 8 == 0);
    _nassert((int)y  % 8 == 0);
    _nassert(c1 % 2 == 0 );
    _nassert(r1 % 2 == 0 );
    _nassert(c2 % 2 == 0 );

 #pragma MUST_ITERATE(1,,)
    for (j = 0; j < c2; j+=2) {
      xoff2 = j;
      #pragma MUST_ITERATE(1,,)
      for (i = 0; i < r1; i+=2) {                         
        xoff1 = i * c1;  
        y1 =&y[j+i*c2];
        y2 =&y[j+c2+i*c2];                          
        sum0  = 0;
        sum1  = 0;
        sum2  = 0;
        sum3  = 0;
        ptr_x = &x1[xoff1];
        ptr_y = &x2[xoff2];
        #pragma MUST_ITERATE(1,,)
        for (k = 0; k < c1; k+=2,ptr_x+=2,ptr_y+=c2*2) {
          x00 = ptr_x[0];
          x01 = ptr_x[1];
          x10 = ptr_x[c1];
          x11 = ptr_x[c1 + 1];
          y00 = ptr_y[0];
          y01 = ptr_y[c2];
          y10 = ptr_y[1];
          y11 = ptr_y[c2 + 1];

          sum0 += x00 * y00 + x01 * y01;
          sum1 += x00 * y10 + x01 * y11;
          sum2 += x10 * y00 + x11 * y01;
          sum3 += x10 * y10 + x11 * y11;
        }
        y1[0] += a*sum0;
        y2[0] += a*sum2;
        y1[1] += a*sum1;
        y2[1] += a*sum3;  
      }
    }           
}

3 年多前

0 chang liu 3 年多前

您好，我需要有人帮我传达到英文论坛，希望更多的人能看到这个消息，我不确定我这么改有没有漏洞，但目前来说测试的结果很棒

0 Shine 3 年多前回复 chang liu

TI__Guru**** 357097 points

请看一下下面对DSPF_dp_mat_mul_Gemm函数的参数定义
software-dl.ti.com/.../group___d_s_p_f__dp__mat__mul__gemm.html

0 chang liu 3 年多前回复 Shine

我看了，我确定我的输入参数没有问题，得到的结果是错的

0 Shine 3 年多前回复 chang liu

TI__Guru**** 357097 points

用TI的DSPF_ dp_ mat_ mul_ Gemm函数没有问题，但是自己修改了源码以后就有问题是么？能否描述一下结果具体是怎么错了？

0 chang liu 3 年多前回复 Shine

我在使用TI的DSPF_ dp_ mat_ mul_ Gemm做矩阵运算C=C+A*B，A、B、C都是6x6的矩阵，得到的结果不对，我尝试分析TI的DSPF_ dp_ mat_ mul_ Gemm的源码，发现存在一些代码的问题，因此我尝试修改了一下它的代码并重新编译，可以得到正确的结果，但我不知道我这么改有没有什么问题。

0 Shine 3 年多前回复 chang liu

TI__Guru**** 357097 points

我把您的问题升级到英文e2e论坛了，请关注下面帖子的回复。
https://e2e.ti.com/support/processors-group/processors/f/processors-forum/1168574/tda4vm-correction-of-possible-errors-in-dsplib-source-code-is-there-any-bug

处理器

处理器论坛

TDA4VM: Be careful，Correction of possible errors in dsplib source code