各位大神好,
我最近想做使用DSPC6678来实现一个算法的实时性,用到了CCS8.3 win10 c6678的板子,由于对DSP性能还不熟悉暂时只用一个核去做了处理。
但是发现在vs中同样的.c程序只用25s,而在DSP中用TSCL检测出来(release下)需要3000s,大大超出了我的遇期(目的是想要减少时间啊)。
代码没有改动,只是代码中申请使用很多动态数组,一维,二维,三维都有用到,算法中也有很多子函数,用到了很多循环。用了系统自带的C6678的.cmd文件。想向大家请教一下是哪里还需要改设置吗,为什么时间会差距这么大? 以下是我的.cmd文件和主函数的代码。十分感谢~~
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include "math.h"
#include "time.h"
#include "c6x.h"
// 读取到的图像,存储到DDR3中
#pragma DATA_SECTION(num,".imageData")
#pragma DATA_ALIGN(num, 256) // 256字节对齐
double num[3];
#define inner_win_size 11
#define out_win_size 33
void readimagedata(double *fp, int H, int W, int Dim, double ***array);
double ***padarray(double ***h, int H, int W, int Dim);
void RXD_function(double ***h, double *mean, double **inv, double **RXD, int Dim, int i, int j);
double **sequential_inv(double **cov, double **aug, int num, int row, int col, char a);
void row_cycle(double ***array, double **aug_plus, double **aug_subtract, double **cov, double *mean, double ***inv_cov_1, double **mean_1, int Dim, int W, double ***h, double **RXD);
void col_cycle(double ***array, double **aug_plus, double **aug_subtract, double **cov, double *mean, double ***inv_cov_1, double **mean_1, int H, int W, int Dim, double ***h, double **RXD);
void cov_inv(double **m, int H, int Dim, double *mean, double lambda, double **cov);
void Firstinv(double ***array, int Dim, double **temp, double *mean, double **cov);
double*** Malloc3DActiveArray(int x, int y, int z);
void Free3DActiveArray(double *** pArr, int x, int y);
double** Malloc2DActiveArray(int x, int y);
void Free2DActiveArray(double ** pArr, int x);
int main()
{
FILE *fid;
fid = fopen("E:\\ccs_workspace\\fast_rx_s_2\\test_double.dat", "rb");
if (fid == NULL)
{
printf("error on open file!\n");
}
printf("test console!\n");
double num[3];
fread(&num, sizeof(double), 3, fid);
int H = num[0];
int W = num[1];
int Dim = num[2];
double *pos = (double*)malloc(sizeof(double)*H*W*Dim);
fread(pos, sizeof(double), H*W*Dim, fid);
fclose(fid);
printf("pos is ok!\n");
double *** h = NULL;
h = Malloc3DActiveArray(H, W, Dim);
readimagedata(pos, H, W, Dim, h);
fclose(fid);
free(pos);
unsigned long long t1,t2;
TSCH=0;
TSCL=0;
t1=_itoll(TSCH,TSCL);
//t1=TSCL;
printf("h is ok!\n");
double *** array = NULL;
array = Malloc3DActiveArray(H + out_win_size-1, W + out_win_size-1, Dim);
array = padarray(h, H + out_win_size-1, W + out_win_size-1, Dim);
double **a = NULL, **cov = NULL;
double *mean = (double *)malloc(Dim * sizeof(double));
a = Malloc2DActiveArray(8 * inner_win_size*inner_win_size, Dim);
cov = Malloc2DActiveArray(Dim, Dim);
Firstinv(array, Dim, a, mean, cov);
double ***inv_cov_1 = NULL;
inv_cov_1 = Malloc3DActiveArray(Dim, Dim, W);
double **mean_1 = NULL;
mean_1 = Malloc2DActiveArray(Dim, W);
int i, j;
for (i = 0; i < Dim; i++)
{
for (j = 0; j < Dim; j++)
{
inv_cov_1[i][j][0] = cov[i][j];
}
mean_1[i][0] = mean[i];
}
double **RXD = NULL;
RXD = Malloc2DActiveArray(H, W);
RXD_function(h, mean, cov, RXD, Dim, 0, 0);
printf("RXD_1 is ok!\n");
double **aug_plus = NULL;
aug_plus = Malloc2DActiveArray(4 * inner_win_size + 1, Dim);
double **aug_subtract = NULL;
aug_subtract = Malloc2DActiveArray(4 * inner_win_size + 1, Dim);
row_cycle(array, aug_plus, aug_subtract, cov, mean, inv_cov_1, mean_1, Dim, W, h, RXD);
printf("row_cycle is ok!\n");
col_cycle(array, aug_plus, aug_subtract, cov, mean, inv_cov_1, mean_1, H, W, Dim, h, RXD);
t2=_itoll(TSCH,TSCL);
//t2=TSCL;
printf("cycle = %lld \n",t2-t1);
printf("all complete!");
FILE *fid1;
fid1 = fopen("RXD_double_r.dat", "wb");
if (fid1 == NULL)
{
printf("error on open wfile!\n");
}
double *f = (double*)malloc(sizeof(double)*Dim*Dim);
for (i = 0; i < H; i++)
{
for (j = 0; j < W; j++)
{
f[i*H + j] = RXD[i][j];
}
}
fwrite(f, sizeof(double), H*W, fid1);
fclose(fid1);
Free3DActiveArray(h,H,W);
Free3DActiveArray(array, H + out_win_size - 1, W + out_win_size - 1);
Free2DActiveArray(a,8*inner_win_size*inner_win_size);
Free2DActiveArray(cov,Dim);
Free3DActiveArray(inv_cov_1,Dim,Dim);
Free2DActiveArray(mean_1,Dim);
Free2DActiveArray(RXD,H);
Free2DActiveArray(aug_plus, 4 * inner_win_size + 1);
Free2DActiveArray(aug_subtract, 4 * inner_win_size + 1);
return 0;
}
.cmd文件
MEMORY
{
LOCAL_L2_SRAM: o = 0x00800000 l = 0x00080000 /* 512kB LOCAL L2/SRAM */
LOCAL_L1P_SRAM: o = 0x00E00000 l = 0x00008000 /* 32kB LOCAL L1P/SRAM */
LOCAL_L1D_SRAM: o = 0x00F00000 l = 0x00008000 /* 32kB LOCAL L1D/SRAM */
SHRAM: o = 0x0C000000 l = 0x00400000 /* 4MB Multicore shared Memmory */
EMIF16_CS2: o = 0x70000000 l = 0x04000000 /* 64MB EMIF16 CS2 Data Memory */
EMIF16_CS3: o = 0x74000000 l = 0x04000000 /* 64MB EMIF16 CS3 Data Memory */
EMIF16_CS4: o = 0x78000000 l = 0x04000000 /* 64MB EMIF16 CS4 Data Memory */
EMIF16_CS5: o = 0x7C000000 l = 0x04000000 /* 64MB EMIF16 CS5 Data Memory */
DDR3: o = 0x80000000 l = 0x80000000 /* 2GB CE0 and CE1 external DDR3 SDRAM */
}
SECTIONS
{
.text > SHRAM
.stack > DDR3
.bss > SHRAM
.cio > SHRAM
.const > SHRAM
.data > SHRAM
.switch > SHRAM
.sysmem > DDR3
.far > SHRAM
.args > SHRAM
.ppinfo > SHRAM
.ppdata > SHRAM
/* COFF sections */
.pinit > SHRAM
.cinit > SHRAM
/* EABI sections */
.binit > SHRAM
.init_array > SHRAM
.neardata > SHRAM
.fardata > SHRAM
.rodata > SHRAM
.c6xabi.exidx > SHRAM
.c6xabi.extab > SHRAM
.imagedata > DDR3
}

