This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

TDA4VM: C66

Part Number: TDA4VM


What is the physical address of C66?0x00800000?Why is there no change in performance when I use memcpy to calculate data at this address?

  • What is the physical address of C66?0x00800000

    0x00800000 是 L2 的地址。

    请参考TRM Table 2-6. C66SS0/1 Memory Map

    Why is there no change in performance when I use memcpy to calculate data at this address?

    请详细说明一下。

  • 我直接用memcpy把数据搬运C66的L2的这个地址,函数运行时间并没有变化,是不是这个地址不正确,真正的L2的物理地址要做一定的偏移?

  • 函数运行时间并没有变化,

    有代码示例吗?包括函数运行时间的测量也贴出来看一下,一般来说不需要偏移。

  • if ((coeffsHeight == 3) && (dilationHeight== 1) && (typeid(Tin) == typeid(uint8_t)) && height/strideHeight >= 8)
    {
      printf("Sy-gPtrL2RAM:%p--------------------------------20220303_1\n", gPtrL2RAM); //gPtrL2RAM:0x00800000
      uint32_t uiMemUsedSize = 0;
    
      //Input
      Tin  *pInData = (Tin *)(uiMemUsedSize + (uint32_t)gPtrL2RAM);   
      uiMemUsedSize += (uint32_t)(numInChannels * inChPitch * sizeof(Tin));
    
      appMemCacheWb(pInChannel, numInChannels * inChPitch * sizeof(Tin));
      appMemCacheWb(pInData, numInChannels * inChPitch * sizeof(Tin));
      
      app_udma_copy_2d_prms_t prms_2d_pInBuf;
      appUdmaCopy2DPrms_Init(&prms_2d_pInBuf);
      prms_2d_pInBuf.width        = inChPitch * sizeof(Tin);
      prms_2d_pInBuf.height       = numInChannels;
      prms_2d_pInBuf.dest_pitch   = inChPitch * sizeof(Tin);
      prms_2d_pInBuf.src_pitch    = inChPitch * sizeof(Tin);
      prms_2d_pInBuf.dest_addr    = Udma_appVirtToPhyFxn((void *)pInData, 1, NULL);  //Udma_appVirtToPhyFxn
      prms_2d_pInBuf.src_addr     = Udma_appVirtToPhyFxn((void *)pInChannel, 1, NULL);  //appMemGetVirt2PhyBufPtr 
      //prms_2d_pInBuf.dest_addr    = (uint64_t)pInData;
      //prms_2d_pInBuf.src_addr     = (uint64_t)pInChannel;  
      appUdmaCopy2D(NULL, &prms_2d_pInBuf, 1);
    
      appMemCacheInv(pInData, numInChannels * inChPitch * sizeof(Tin));
    
      //Weight
      Tw   *pWeightData = (Tw *)(uiMemUsedSize + (uint32_t)gPtrL2RAM);
      uiMemUsedSize += (uint32_t)(numOutChannels * numInChannels * coeffsHeight * coeffsWidth * sizeof(Tw));
    
      appMemCacheWb(pCoeffs, numOutChannels * numInChannels * coeffsHeight * coeffsWidth * sizeof(Tw));
      appMemCacheWb(pWeightData, numOutChannels * numInChannels * coeffsHeight * coeffsWidth * sizeof(Tw));
    
      app_udma_copy_2d_prms_t prms_2d_pWeightBuf;
      appUdmaCopy2DPrms_Init(&prms_2d_pWeightBuf);
      prms_2d_pWeightBuf.width        = coeffsHeight * coeffsWidth * sizeof(Tw);
      prms_2d_pWeightBuf.height       = numOutChannels * numInChannels;
      prms_2d_pWeightBuf.dest_pitch   = coeffsHeight * coeffsWidth * sizeof(Tw);
      prms_2d_pWeightBuf.src_pitch    = coeffsHeight * coeffsWidth * sizeof(Tw);  
      prms_2d_pWeightBuf.dest_addr    = Udma_appVirtToPhyFxn((void *)pWeightData, 1, NULL); 
      prms_2d_pWeightBuf.src_addr     = Udma_appVirtToPhyFxn((void *)pCoeffs, 1, NULL);  
      //prms_2d_pWeightBuf.dest_addr    = (uint64_t)pWeightData; 
      //prms_2d_pWeightBuf.src_addr     = (uint64_t)pCoeffs;
      appUdmaCopy2D(NULL, &prms_2d_pWeightBuf, 1);   
    
      appMemCacheInv(pWeightData, numOutChannels * numInChannels * coeffsHeight * coeffsWidth * sizeof(Tw));
    
      //Bias
      Tb   *pBiasData = (Tb *)(uiMemUsedSize + (uint32_t)gPtrL2RAM);
      uiMemUsedSize += (uint32_t)(numOutChannels * sizeof(Tb));
    
      appMemCacheWb(pBias, numOutChannels * sizeof(Tb));
      appMemCacheWb(pBiasData, numOutChannels * sizeof(Tb));
    
      app_udma_copy_1d_prms_t prms_1d_pBaisBuf;
      appUdmaCopy1DPrms_Init(&prms_1d_pBaisBuf);
      prms_1d_pBaisBuf.dest_addr    = Udma_appVirtToPhyFxn((void *)pBiasData, 1, NULL); 
      prms_1d_pBaisBuf.src_addr     = Udma_appVirtToPhyFxn((void *)pBias, 1, NULL); 
      //prms_1d_pBaisBuf.dest_addr    = (uint64_t)pBiasData; 
      //prms_1d_pBaisBuf.src_addr     = (uint64_t)pBias;
      prms_1d_pBaisBuf.length       = numOutChannels * sizeof(Tb); 
      appUdmaCopy1D(NULL, &prms_1d_pBaisBuf); 
    
      appMemCacheInv(pBiasData, numOutChannels * sizeof(Tb));       
    
      Tacc  *pOutData = (Tacc *)(uiMemUsedSize + (uint32_t)gPtrL2RAM);
      memset(pOutData, 0, numOutChannels * outChPitch * sizeof(Tacc));
      uiMemUsedSize += (uint32_t)(numOutChannels * outChPitch * sizeof(Tacc));
    
      ullCyclesStart = get_tsc();
    
      TIDL_refConv2dKernel_i8u_c8s_o32s_3x3s1d1(pInData, pWeightData, pBiasData, pOutData , &min, &max, numTotRoi, numGroups, numInChannels,
        numOutChannels, inChPitch, outChPitch, width, height, inImPitch, outImPitch,
        coeffsWidth, coeffsHeight, dilationWidth, dilationHeight, strideWidth, strideHeight, params->enableBias);
      
      ullCyclesEnd = get_tsc();   
      
      printf("Sy-Conv cost cycles %llu\n", (ullCyclesEnd - ullCyclesStart));
    
      appMemCacheWb(pOutData, numOutChannels * outChPitch * sizeof(Tacc));
      appMemCacheWb(accPtr, numOutChannels * outChPitch * sizeof(Tacc));
    
      app_udma_copy_2d_prms_t prms_2d_pOutBuf;
      appUdmaCopy2DPrms_Init(&prms_2d_pOutBuf);
      prms_2d_pOutBuf.width        = outChPitch * sizeof(Tacc);
      prms_2d_pOutBuf.height       = numOutChannels;
      prms_2d_pOutBuf.dest_pitch   = outChPitch * sizeof(Tacc);
      prms_2d_pOutBuf.src_pitch    = outChPitch * sizeof(Tacc);
      prms_2d_pOutBuf.dest_addr    = Udma_appVirtToPhyFxn((void *)accPtr, 1, NULL);  //Udma_appVirtToPhyFxn
      prms_2d_pOutBuf.src_addr     = Udma_appVirtToPhyFxn((void *)pOutData, 1, NULL);  //Udma_appPhyToVirtFxn
      //prms_2d_pOutBuf.dest_addr    = (uint64_t)accPtr;
      //prms_2d_pOutBuf.src_addr     = (uint64_t)pOutData;  
      appUdmaCopy2D(NULL, &prms_2d_pOutBuf, 1);
    
      appMemCacheInv(accPtr, numOutChannels * outChPitch * sizeof(Tacc));  
    } 
    

    这段代码是我用UDMA拷贝做的,现在数据搬运的结果是对的,但是测试cycles基本没变化,UDMA搬运数据性能不是应该会快很多嘛

    是我UDMA拷贝用的不对还是L2没用起来?

    我地址没用这个函数(Udma_appVirtToPhyFxn)偏移直接用它本来的地址就会报错:UDMA : ERROR: TR Response not completed!!

    gPtrL2RAM这个地址就是0x00800000

  • 这个问题建议您到英文论坛咨询看一下,会有相关专家给您提供支持。

  • 不客气,我会暂时将该贴关闭。