大家好、
在过去的几天里、我一直在寻找一种在 MSP430FR5969 MCU 上高效实现矩阵乘法的方法、而这种方法浪费了太多的计算功率、同时也保持了低功耗。 通过互联网、我们发现了两个名为 Qmath 和 IQmath 的库、这两个库都有望实现更快的乘法时间和更低的功耗。
遗憾的是、当我们对它们进行测试时、我们无法发现功耗的任何显著变化、也无法发现处理时间。 我希望大家能给我一个手、 因为我们可能缺少库的一些配置。

这是我使用 Qmath 库的代码(由于平均平方误差更小而配置 Q4)。
#include <msp430.h>
#include <stdint.h>
/* Select the global Q value and include the Qmath header file. */
#define GLOBAL_Q 4
#include "QmathLib.h"
#define sz 16
#define lpc 15
int main(void){
WDTCTL = WDTPW | WDTHOLD; // Stop WDT
P1OUT = 0;
P1DIR = 0xFF;
P2OUT = 0;
P2DIR = 0xFF;
P3OUT = 0;
P3DIR = 0xFF;
P4OUT = 0;
P4DIR = 0xFF;
PJOUT = 0;
PJDIR = 0xFFFF;
PM5CTL0 &= ~LOCKLPM5;
float fa[sz][sz] = {
3.0451312, -2.6170964, -0.36237785, 0.67903620, 0.54625326, -1.4386408, 1.1592991, -0.054826848, -0.038200974, -0.095277585, -2.2473435, 3.3342791, -0.30391535, -1.0859760, -0.34301928, 0.41210720,
-2.6170964, 5.0436597, -2.0817051, -0.79343557, 0.15403587, 1.5043120, -2.2707827, 1.2366960, -0.014145052, 0.049386151, 1.7008784, -4.7957544, 3.3069613, 0.57205218, -0.67790008, -0.34301928,
-0.36237785, -2.0817051, 4.9076262, -2.2785275, -0.87535220, 0.58374476, 1.2477649, -2.2913878, 1.2353808, -0.0016905917, 0.38163254, 1.0874399, -4.5344720, 3.4830644, 0.57205218, -1.0859760,
0.67903620, -0.79343557, -2.2785275, 4.6777167, -2.3003640, 0.0097080851, 0.048614282, 1.2007601, -2.3131311, 1.1977108, -0.11479994, 0.63922209, 1.2034756, -4.5344720, 3.3069613, -0.30391535,
0.54625326, 0.15403587, -0.87535220, -2.3003640, 4.9336734, -2.2747848, -0.099792719, 0.039780665, 1.1884731, -2.3556819, 1.0750339, 0.21016976, 0.63922209, 1.0874399, -4.7957544, 3.3342791,
-1.4386408, 1.5043120, 0.58374476, 0.0097080851, -2.2747848, 1.8861172, -0.22406493, 0.013938670, 0.098963775, 1.3059657, -2.6704209, 1.0750339, -0.11479994, 0.38163254, 1.7008784, -2.2473435,
1.1592991, -2.2707827, 1.2477649, 0.048614282, -0.099792719, -0.22406493, 0.57083476, -0.30296537, -0.026575994, 0.022921033, 1.3059657, -2.3556819, 1.1977108, -0.0016905917, 0.049386151, -0.095277585,
-0.054826848, 1.2366960, -2.2913878, 1.2007601, 0.039780665, 0.013938670, -0.30296537, 0.56730592, -0.30373546, -0.026575994, 0.098963775, 1.1884731, -2.3131311, 1.2353808, -0.014145052, -0.038200974,
-0.038200974, -0.014145052, 1.2353808, -2.3131311, 1.1884731, 0.098963775, -0.026575994, -0.30373546, 0.56730592, -0.30296537, 0.013938670, 0.039780665, 1.2007601, -2.2913878, 1.2366960, -0.054826848,
-0.095277585, 0.049386151, -0.0016905917, 1.1977108, -2.3556819, 1.3059657, 0.022921033, -0.026575994, -0.30296537, 0.57083476, -0.22406493, -0.099792719, 0.048614282, 1.2477649, -2.2707827, 1.1592991,
-2.2473435, 1.7008784, 0.38163254, -0.11479994, 1.0750339, -2.6704209, 1.3059657, 0.098963775, 0.013938670, -0.22406493, 1.8861172, -2.2747848, 0.0097080851, 0.58374476, 1.5043120, -1.4386408,
3.3342791, -4.7957544, 1.0874399, 0.63922209, 0.21016976, 1.0750339, -2.3556819, 1.1884731, 0.039780665, -0.099792719, -2.2747848, 4.9336734, -2.3003640, -0.87535220, 0.15403587, 0.54625326,
-0.30391535, 3.3069613, -4.5344720, 1.2034756, 0.63922209, -0.11479994, 1.1977108, -2.3131311, 1.2007601, 0.048614282, 0.0097080851, -2.3003640, 4.6777167, -2.2785275, -0.79343557, 0.67903620,
-1.0859760, 0.57205218, 3.4830644, -4.5344720, 1.0874399, 0.38163254, -0.0016905917, 1.2353808, -2.2913878, 1.2477649, 0.58374476, -0.87535220, -2.2785275, 4.9076262, -2.0817051, -0.36237785,
-0.34301928, -0.67790008, 0.57205218, 3.3069613, -4.7957544, 1.7008784, 0.049386151, -0.014145052, 1.2366960 -2.2707827, 1.5043120, 0.15403587, -0.79343557, -2.0817051, 5.0436597, -2.6170964,
0.41210720, -0.34301928, -1.0859760, -0.30391535, 3.3342791, -2.2473435, -0.095277585, -0.038200974, -0.054826848, 1.1592991, -1.4386408, 0.54625326, 0.67903620, -0.36237785, -2.6170964, 3.0451312};
float fb[sz] = {2.3, 5.1, 1.05, 2.33, 3.56, 2.3, 5.1, 1.05, 2.33, 3.56, 2.3, 5.1, 1.05, 2.33, 3.56, 1.1};
float fout[sz] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};;
_q out[sz];
int n;
int m;
while(1){
// Matrix-Vector multiplication
for(n = lpc; n>=0; n--){
out[n] = _Q(0);
for(m = lpc; m>=0; m--)
out[n] += _Qmpy(_Q(fa[n][m]), _Q(fb[m]));
fout[n] = _QtoF(out[n]);
}
__delay_cycles(8000000);
}
}
下面是使用 通用 C 算术运算符实现的相同代码:
#include <msp430.h>
#include <stdint.h>
#define sz 16
#define lpc 15
int main(void){
WDTCTL = WDTPW | WDTHOLD; // Stop WDT
P1OUT = 0;
P1DIR = 0xFF;
P2OUT = 0;
P2DIR = 0xFF;
P3OUT = 0;
P3DIR = 0xFF;
P4OUT = 0;
P4DIR = 0xFF;
PJOUT = 0;
PJDIR = 0xFFFF;
PM5CTL0 &= ~LOCKLPM5;
float fa[sz][sz] = {
3.0451312, -2.6170964, -0.36237785, 0.67903620, 0.54625326, -1.4386408, 1.1592991, -0.054826848, -0.038200974, -0.095277585, -2.2473435, 3.3342791, -0.30391535, -1.0859760, -0.34301928, 0.41210720,
-2.6170964, 5.0436597, -2.0817051, -0.79343557, 0.15403587, 1.5043120, -2.2707827, 1.2366960, -0.014145052, 0.049386151, 1.7008784, -4.7957544, 3.3069613, 0.57205218, -0.67790008, -0.34301928,
-0.36237785, -2.0817051, 4.9076262, -2.2785275, -0.87535220, 0.58374476, 1.2477649, -2.2913878, 1.2353808, -0.0016905917, 0.38163254, 1.0874399, -4.5344720, 3.4830644, 0.57205218, -1.0859760,
0.67903620, -0.79343557, -2.2785275, 4.6777167, -2.3003640, 0.0097080851, 0.048614282, 1.2007601, -2.3131311, 1.1977108, -0.11479994, 0.63922209, 1.2034756, -4.5344720, 3.3069613, -0.30391535,
0.54625326, 0.15403587, -0.87535220, -2.3003640, 4.9336734, -2.2747848, -0.099792719, 0.039780665, 1.1884731, -2.3556819, 1.0750339, 0.21016976, 0.63922209, 1.0874399, -4.7957544, 3.3342791,
-1.4386408, 1.5043120, 0.58374476, 0.0097080851, -2.2747848, 1.8861172, -0.22406493, 0.013938670, 0.098963775, 1.3059657, -2.6704209, 1.0750339, -0.11479994, 0.38163254, 1.7008784, -2.2473435,
1.1592991, -2.2707827, 1.2477649, 0.048614282, -0.099792719, -0.22406493, 0.57083476, -0.30296537, -0.026575994, 0.022921033, 1.3059657, -2.3556819, 1.1977108, -0.0016905917, 0.049386151, -0.095277585,
-0.054826848, 1.2366960, -2.2913878, 1.2007601, 0.039780665, 0.013938670, -0.30296537, 0.56730592, -0.30373546, -0.026575994, 0.098963775, 1.1884731, -2.3131311, 1.2353808, -0.014145052, -0.038200974,
-0.038200974, -0.014145052, 1.2353808, -2.3131311, 1.1884731, 0.098963775, -0.026575994, -0.30373546, 0.56730592, -0.30296537, 0.013938670, 0.039780665, 1.2007601, -2.2913878, 1.2366960, -0.054826848,
-0.095277585, 0.049386151, -0.0016905917, 1.1977108, -2.3556819, 1.3059657, 0.022921033, -0.026575994, -0.30296537, 0.57083476, -0.22406493, -0.099792719, 0.048614282, 1.2477649, -2.2707827, 1.1592991,
-2.2473435, 1.7008784, 0.38163254, -0.11479994, 1.0750339, -2.6704209, 1.3059657, 0.098963775, 0.013938670, -0.22406493, 1.8861172, -2.2747848, 0.0097080851, 0.58374476, 1.5043120, -1.4386408,
3.3342791, -4.7957544, 1.0874399, 0.63922209, 0.21016976, 1.0750339, -2.3556819, 1.1884731, 0.039780665, -0.099792719, -2.2747848, 4.9336734, -2.3003640, -0.87535220, 0.15403587, 0.54625326,
-0.30391535, 3.3069613, -4.5344720, 1.2034756, 0.63922209, -0.11479994, 1.1977108, -2.3131311, 1.2007601, 0.048614282, 0.0097080851, -2.3003640, 4.6777167, -2.2785275, -0.79343557, 0.67903620,
-1.0859760, 0.57205218, 3.4830644, -4.5344720, 1.0874399, 0.38163254, -0.0016905917, 1.2353808, -2.2913878, 1.2477649, 0.58374476, -0.87535220, -2.2785275, 4.9076262, -2.0817051, -0.36237785,
-0.34301928, -0.67790008, 0.57205218, 3.3069613, -4.7957544, 1.7008784, 0.049386151, -0.014145052, 1.2366960 -2.2707827, 1.5043120, 0.15403587, -0.79343557, -2.0817051, 5.0436597, -2.6170964,
0.41210720, -0.34301928, -1.0859760, -0.30391535, 3.3342791, -2.2473435, -0.095277585, -0.038200974, -0.054826848, 1.1592991, -1.4386408, 0.54625326, 0.67903620, -0.36237785, -2.6170964, 3.0451312};
float fb[sz] = {2.3, 5.1, 1.05, 2.33, 3.56, 2.3, 5.1, 1.05, 2.33, 3.56, 2.3, 5.1, 1.05, 2.33, 3.56, 1.1};
float fout[sz] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
int n;
int m;
while(1){
for(n = lpc; n>=0; n--){
for(m = lpc; m>=0; m--)
fout[n] += fa[n][m]*fb[m];
}
__delay_cycles(8000000);
}
}
此外、我的一些变量看起来会随着这两种实现而溢出。 是否有关于如何避免这种情况的建议?
提前感谢、
Julio。