c代码是这样的:(test.c)
extern int u32_sqrt2(int, int*);
int main()
{
int ival[16] = {1073741824,536870912,268435456,134217728,67108864,33554432,16777216,8388608,4194304,2097152,1048576,524288,262144,131072,65536,32768};
int m = u32_sqrt2(49, ival);
printf("m:%d---------------",m);
return (m);
}
汇编代码是这样的:(lab.asm)
.global u32_sqrt2 u32_sqrt2: SUBAW.D2 B15,0x6,B15 MV.L1X B4,A3 || STW.D2T1 A4,*B15[1] STW.D2T1 A3,*B15[2] ;36 int r = 0; MVK.L1 0,A3 STW.D2T1 A3,*B15[3] ;39 for(i=0;i<16; i++) MV.L2X A3,B4 STW.D2T2 B4,*B15[4] MVK.S2 16,B5 CMPLT.L2 B4,B5,B0 [!B0] BNOP.S1 L4,5 ;41 int x = ival[i]; L1: MV.L2 B4,B5 || LDW.D2T2 *B15[2],B4 LDW.D2T2 *B4[B5],B4 NOP 4 STW.D2T2 B4,*B15[5] ;43 if(x + r <= val) LDW.D2T2 *B15[3],B5 LDW.D2T2 *B15[1],B6 NOP 3 ADD.L2 B5,B4,B4 CMPGT.L2 B4,B6,B0 [ B0] BNOP.S1 L2,5 ;45 val -= x + r; LDW.D2T2 *+B15[5],B6 LDW.D2T2 *+B15[1],B31 NOP 3 ADD.L2 B5,B6,B4 SUB.L2 B31,B4,B4 STW.D2T2 B4,*B15[1] ;46 r = (r >> 1) | x; LDW.D2T2 *B15[3],B4 MV.L2 B6,B5 NOP 3 SHR.S2 B4,0x1,B4 OR.L2 B5,B4,B4 STW.D2T2 B4,*B15[3] ;47 } BNOP.S1 L3,5 ;50 r = r >> 1; L2: MV.L2 B5,B4 SHR.S2 B4,0x1,B4 STW.D2T2 B4,*B15[3] ;39 for(i=0;i<16; i++) L3: LDW.D2T2 *B15[4],B4 ADD.L2 B4,1,B4 STW.D2T2 B4,*B15[4] MVK.S2 16,B5 CMPLT.L2 B4,B5,B0 [ B0] BNOP.S1 L1,5 ;53 return r; L4: LDW.D2T1 *+B15[3],A4 ;55 } ADDK.S2 24,B15 BNOP.S2 B3,5
汇编对应的c语言是这样的:
int u32_sqrt(int val, int* ival)
{
int r = 0;
int i;
for(i=0;i<16; i++)
{
int x = ival[i];
if(x + r <= val)
{
val -= x + r;
r = (r >> 1) | x;
}
else
{
r = r >> 1;
}
}
return r;
}
编译正常,运行时发现调用u32_sqrt2一直不会返回。 请问这段汇编代码有问题吗?