各位专家,大家好!
关于6678中EDMA模块8核并行传输。我采用了如下配置:核0~1使用EDMA CC0,核2~5使用EDMA CC1,核6~7使用EDMA CC2。核0使用Que0;核1使用Que1;核2使用Que0;核3使用Que1;核4使用Que2;核5使用Que3;核6使用Que0;核7使用Que1。
每个核使用与核号相同的通道号,完成标志TCC,PaRAM。每个通道采用shadow_region方式,以核号的形式进行区域划分。
现在的情况是8个核并行传输,都可以完成EDMA传输。但是达不到并行模式,耗时没有得到优化。根据6678 datasheet。EDMA模块共有10个TC模块,所有理论上可以达到8个核并行传输8个通道的数据。但是单核传输大块数据,与8核均分分块传输数据,耗时相差不大(传输64M的数据时,单核耗时为200ms;8核耗时为169ms)。没有达到并行传输的目的。虽然数据传输完成并且没有错误。
请问要达到8核完全并行运行EDMA,使8核耗时达到单核的1/8时,需要更改设置哪些参数?非常感谢!请各位专家指点。希望能够得到各位专家的宝贵建议。
附上主要函数:
#include <ti/csl/csl_chip.h>
#include <ti/csl/csl_edma3.h>
#include <ti/csl/csl_edma3Aux.h>
#include <ti/csl/csl_cacheAux.h>
CSL_Edma3ChannelObj chObj;
CSL_Edma3Obj edmaObj;
CSL_Status status;
CSL_Edma3Context context;
extern unsigned int coreNum;
/***************************************************************
* void DMA_Init_region(int ChannelNum)
* use region shadow
**************************************************************/
void DMA_Init_region()
{
int instNum;
if(coreNum < 2)
{
instNum = 0;
}
else if(coreNum < 6)
{
instNum = 1;
}
else
{
instNum = 2;
}
CSL_Edma3ChannelAttr chAttr;
CSL_Edma3Handle hModule;
CSL_Edma3ChannelHandle hChannel;
CSL_Edma3CmdDrae regionAccess;
CSL_Edma3CmdIntr regionIntr;
/* Module initialization */
CSL_edma3Init(&context);
/* Module level open */
hModule = CSL_edma3Open(&edmaObj,instNum, NULL, &status);
regionAccess.region = coreNum;
regionAccess.drae = 0x1<<coreNum;
regionAccess.draeh = 0x0;
CSL_edma3HwControl(hModule,CSL_EDMA3_CMD_DMAREGION_ENABLE, ®ionAccess);
regionIntr.region = coreNum;
regionIntr.intr = 0x1<<coreNum;
regionIntr.intrh = 0x0;
CSL_edma3HwControl(hModule,CSL_EDMA3_CMD_INTR_ENABLE, ®ionIntr);
CSL_edma3MapEventQueueToTC(hModule, 0, 0);
if(coreNum < 2)
{
CSL_edma3MapEventQueueToTC(hModule, coreNum, coreNum);
}
else if(coreNum < 6)
{
CSL_edma3MapEventQueueToTC(hModule, coreNum-2, coreNum-2);
}
else
{
CSL_edma3MapEventQueueToTC(hModule, coreNum-6, coreNum-6);
}
chAttr.regionNum = coreNum; //coreNum; //CSL_EDMA3_REGION_GLOBAL CSL_EDMA3_REGION_0
chAttr.chaNum = coreNum;
hChannel = CSL_edma3ChannelOpen(&chObj, instNum, &chAttr, &status);
/* Map the DMA Channel to the appropriate PARAM Block. We start with PING
* which is located at PARAM Block cNum. */
CSL_edma3HwChannelSetupParam(hChannel, coreNum); // ParamNum
/* Enable channel */
CSL_edma3HwChannelControl(hChannel,CSL_EDMA3_CMD_CHANNEL_ENABLE, NULL);
}
/***************************************************************
* void DMA_transport_ab_region(int ChannelNum,Uint32 srcBuff , Uint32 dstBuff, int acnt, int bcnt, int srcBidx, int dstBidx)
* use region shadow
**************************************************************/
void DMA_transport_ab_region(Uint32 srcBuff , Uint32 dstBuff, int acnt, int bcnt, int srcBidx, int dstBidx)
{
int instNum;
if(coreNum < 2)
{
instNum = 0;
}
else if(coreNum < 6)
{
instNum = 1;
}
else
{
instNum = 2;
}
CSL_Edma3ChannelHandle hChannel;
CSL_Edma3ParamSetup myParamSetup;
CSL_Edma3ParamHandle hParamPing;
CSL_Edma3ChannelAttr chAttr;
chAttr.regionNum = coreNum; //coreNum; //CSL_EDMA3_REGION_GLOBAL CSL_EDMA3_REGION_0
chAttr.chaNum = coreNum;
hChannel = CSL_edma3ChannelOpen(&chObj, instNum, &chAttr, &status);
hParamPing = CSL_edma3GetParamHandle(hChannel, coreNum, &status); // panum
myParamSetup.option = CSL_EDMA3_OPT_MAKE(CSL_EDMA3_ITCCH_DIS, \
CSL_EDMA3_TCCH_DIS, \
CSL_EDMA3_ITCINT_DIS, \
CSL_EDMA3_TCINT_EN, \
coreNum, CSL_EDMA3_TCC_NORMAL,\
CSL_EDMA3_FIFOWIDTH_NONE, \
CSL_EDMA3_STATIC_DIS, \
CSL_EDMA3_SYNC_AB, \
CSL_EDMA3_ADDRMODE_INCR, \
CSL_EDMA3_ADDRMODE_INCR );
myParamSetup.srcAddr = srcBuff;
myParamSetup.aCntbCnt = CSL_EDMA3_CNT_MAKE(acnt,bcnt);
myParamSetup.dstAddr = dstBuff;
myParamSetup.srcDstBidx = CSL_EDMA3_BIDX_MAKE(srcBidx, dstBidx);
myParamSetup.srcDstCidx = CSL_EDMA3_CIDX_MAKE(0,0);
myParamSetup.cCnt = 1;
myParamSetup.linkBcntrld= CSL_EDMA3_LINKBCNTRLD_MAKE(CSL_EDMA3_LINK_NULL,0);
CSL_edma3ParamSetup(hParamPing,&myParamSetup);
switch(coreNum)
{
case 0:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_0);
break;
case 1:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_1);
break;
case 2:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_0);
break;
case 3:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_1);
break;
case 4:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_2);
break;
case 5:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_3);
break;
case 6:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_0);
break;
case 7:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_1);
break;
}
/* Manually trigger the channel 设置同步事件*/
CSL_edma3HwChannelControl(hChannel,CSL_EDMA3_CMD_CHANNEL_SET,NULL);
}
/***************************************************************
* void waitDMAover_region(int ChannelNum)
* use region shadow
**************************************************************/
void waitDMAover_region()
{
CSL_Edma3Handle hModule;
CSL_Edma3CmdIntr regionIntr;
int instNum;
if(coreNum < 2)
{
instNum = 0;
}
else if(coreNum < 6)
{
instNum = 1;
}
else
{
instNum = 2;
}
int query=0x1<<coreNum;
hModule = CSL_edma3Open(&edmaObj,instNum,NULL,&status);
regionIntr.region = coreNum;//coreNum
regionIntr.intr = 0;
regionIntr.intrh = 0;
/* Poll on IPR bit 0 */
do {
CSL_edma3GetHwStatus(hModule,CSL_EDMA3_QUERY_INTRPEND,®ionIntr);
} while (!(regionIntr.intr & query));
/* Clear the pending bit */
CSL_edma3HwControl(hModule,CSL_EDMA3_CMD_INTRPEND_CLEAR, ®ionIntr);
}
/***************************************************************
* void DMA_Close_region(int ChannelNum)
* use region shadow
**************************************************************/
void DMA_Close_region()
{
int instNum;
if(coreNum < 2)
{
instNum = 0;
}
else if(coreNum < 6)
{
instNum = 1;
}
else
{
instNum = 2;
}
CSL_Edma3Handle hModule;
CSL_Edma3ChannelHandle hChannel;
CSL_Edma3ChannelAttr chAttr;
chAttr.regionNum = coreNum;
chAttr.chaNum = coreNum;
hChannel = CSL_edma3ChannelOpen(&chObj, instNum, &chAttr, &status);
hModule = CSL_edma3Open(&edmaObj,instNum,NULL,&status);
/* Close channel */
CSL_edma3ChannelClose(hChannel);
/* Close EDMA module */
CSL_edma3Close(hModule);
}
unsigned int convert_coreLocalToGlobalAddr( unsigned int addr)
{
unsigned int coreNum0;
coreNum0 = CSL_chipReadReg(CSL_CHIP_DNUM);
return ((1 << 28) | (coreNum0 << 24) | (addr & 0x00ffffff));
}
/*
* main.c
*/
#include <stdint.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ti/csl/csl_ChipAux.h>
#include <ti/csl/csl_semAux.h>
#include "RSP_EDMA.h"
#pragma DATA_ALIGN(x_sp, 8);
float x_sp[2*NRN];
unsigned int coreNum; //当前程序运行的核号
void main ()
{
coreNum=CSL_chipReadReg(CSL_CHIP_DNUM);//获得各核编号
coreNum = coreNum % 8;
Uint32 srcAddr = 0x84000000 + coreNum*NAN*2*size_float/8;
DMA_Init_region();
int i = 0;
int loopNum = NAN/8;
for(i=0; i<loopNum; i++)
{
//传入
DMA_transport_ab_region(srcAddr + 2*size_float*i, convert_coreLocalToGlobalAddr((Uint32)&x_sp),
size_float*2, NRN, 2*size_float*NAN, 2*size_float);
waitDMAover_region();
}
CSL_semAcquireDirect(coreNum+1); //每个核获得对应核号加1编号的信号量
while((CSL_semIsFree(1)|CSL_semIsFree(2)|CSL_semIsFree(3)|CSL_semIsFree(4)|CSL_semIsFree(5)|CSL_semIsFree(6)|CSL_semIsFree(7)|CSL_semIsFree(8)));
DMA_Close_region();
}