TDA4VEN-Q1: FFTLIB重复调用输出结果异常

? ??

Part Number: TDA4VEN-Q1
Other Parts Discussed in Thread: FFTLIB

你好

我在开发dsp c7x时碰到连续调用FFTLIB_fft1dBatched_i16sc_c16sc_o16sc_kernel函数时，第一次fft计算结果正常，第二次fft计算结果异常，单独进行某一次的fft(只使用第一次fftt或者只使用第二次fft)结果均正确。第一次fft输入数组为128通道1024个采样点，第二次输入数组为512通道128个采样点，代码如下：

Fullscreen

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
__attribute__((section(".l2mem"), aligned(64))) int16_t l2_user_array0[128][2048];   // 512k
__attribute__((section(".l2mem"), aligned(64))) int16_t l2_user_array1[128][2048];   // 512k
__attribute__((section(".l2mem"), aligned(64))) int16_t l2_user_array2[512][256];    // 256k
__attribute__((section(".l2mem"), aligned(64))) int16_t l2_user_array3[512][256];    // 256k
void func()
{
    battch_fft1d_info_type l_battch_fft1d_info = {0};
    
    l_battch_fft1d_info.num_shifts = 5;
    l_battch_fft1d_info.channel = 128;
    l_battch_fft1d_info.num_points = 1024;
    l_battch_fft1d_info.data_type = FFTLIB_INT16;
    bsp_dsppro_battch_fft1d((int16_t *)l2_user_array0, (int16_t *)l2_user_array1, &l_battch_fft1d_info);
   
    l_battch_fft1d_info.num_shifts = 3;
    l_battch_fft1d_info.channel = 512;
    l_battch_fft1d_info.num_points = 128;
    l_battch_fft1d_info.data_type = FFTLIB_INT16;
    bsp_dsppro_battch_fft1d((int16_t *)l2_user_array2, (int16_t *)l2_user_array3, &l_battch_fft1d_info);
}
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

__attribute__((section(".l2mem"), aligned(64))) int16_t l2_user_array0[128][2048];   // 512k
__attribute__((section(".l2mem"), aligned(64))) int16_t l2_user_array1[128][2048];   // 512k
__attribute__((section(".l2mem"), aligned(64))) int16_t l2_user_array2[512][256];    // 256k
__attribute__((section(".l2mem"), aligned(64))) int16_t l2_user_array3[512][256];    // 256k

void func()
{
    battch_fft1d_info_type l_battch_fft1d_info = {0};
    
    l_battch_fft1d_info.num_shifts = 5;
    l_battch_fft1d_info.channel = 128;
    l_battch_fft1d_info.num_points = 1024;
    l_battch_fft1d_info.data_type = FFTLIB_INT16;
    bsp_dsppro_battch_fft1d((int16_t *)l2_user_array0, (int16_t *)l2_user_array1, &l_battch_fft1d_info);
   
    l_battch_fft1d_info.num_shifts = 3;
    l_battch_fft1d_info.channel = 512;
    l_battch_fft1d_info.num_points = 128;
    l_battch_fft1d_info.data_type = FFTLIB_INT16;
    bsp_dsppro_battch_fft1d((int16_t *)l2_user_array2, (int16_t *)l2_user_array3, &l_battch_fft1d_info);
}

uint8_t bsp_dsppro_battch_fft1d(int16_t *input, int16_t *output, battch_fft1d_info_type *battch_fft1d_info)
{
    uint8_t  l_u8_ret = 0;
    int16_t  *pX;
    int16_t  *pY;
    int16_t  *pW;
    uint32_t *pShift;
    FFTLIB_bufParams1D_t bufParamsData;
    FFTLIB_bufParams1D_t bufParamsShift;
    FFTLIB_bufParams1D_t bufParamsTw;

    FFTLIB_STATUS status_opt = FFTLIB_SUCCESS;

    uint32_t numShifts = battch_fft1d_info->num_shifts;      // 5:1024,   3:128
    uint32_t l_u32_channel = battch_fft1d_info->channel;     // 128 chirp,  1024 point
    uint32_t numPoints  = battch_fft1d_info->num_points;     // 1024 point,  128 chirp
    uint32_t dataMemSize = l_u32_channel * numPoints * 2;     /* Kernel requires input/output */
                                                              /* buffers to be atleast
                                                               * 128 elements long */

    uint8_t *pblock = NULL;
    pblock = FFTLIB_fft1dbatched_i16sc_c16sc_o16sc_pBlock;

    pX = (int16_t *)input;
    pY = (int16_t *)output;
    pW = malloc(numPoints * 2 * sizeof (int16_t));
    pShift = malloc(numShifts * sizeof (uint32_t));

    if ((pX == NULL) || (pY == NULL) || (pW == NULL) || (pShift == NULL))
    {
        DebugP_log("[info]pX is NULL!\r\n");
        l_u8_ret = 1;
        goto error;
    }

    bufParamsData.dim_x     = dataMemSize;
    bufParamsData.data_type = FFTLIB_INT16;

    bufParamsShift.dim_x     = numShifts;
    bufParamsShift.data_type = FFTLIB_UINT32;

    bufParamsTw.dim_x        = numPoints * 2;
    bufParamsTw.data_type    = FFTLIB_INT16;

    tw_gen (pW, numPoints);

    /* 批量fft变换 */
    /* 批量fft初始化 */
    status_opt = FFTLIB_fft1dBatched_i16sc_c16sc_o16sc_init((int16_t *) pX, &bufParamsData, (int16_t *) pW, &bufParamsTw,
                                                            (int16_t *) pY, &bufParamsData, (uint32_t *) pShift, &bufParamsShift, 
                                                            numPoints, l_u32_channel, pblock);
    if (status_opt != FFTLIB_SUCCESS)
    {
        l_u8_ret = 1;
        goto error;

    // /* 批量fft参数检查 */
    // status_opt = FFTLIB_fft1dBatched_i16sc_c16sc_o16sc_checkParams((int16_t *) pX, &bufParamsData, (int16_t *) pW, &bufParamsTw,
    //                                                                (int16_t *) pY, &bufParamsData, (uint32_t *) pShift, &bufParamsShift, 
    //                                                                numPoints, l_u32_channel, pblock);
    // if (status_opt != FFTLIB_SUCCESS)
    // {
    //     l_u8_ret = 2;
    //     goto error;
    // }
    
    /* 批量执行fft */
    status_opt = FFTLIB_fft1dBatched_i16sc_c16sc_o16sc_kernel((int16_t *) pX, &bufParamsData, (int16_t *) pW, &bufParamsTw,
                                                              (int16_t *) pY, &bufParamsData, (uint32_t *) pShift, &bufParamsShift, 
                                                              numPoints, l_u32_channel, pblock);
    if (status_opt != FFTLIB_SUCCESS)
    {
        l_u8_ret = 3;
        goto error;
    }

    error:
    /* 释放内存 */
    if (pW != NULL)
    {
        free(pW);
    }
    if (pShift != NULL)
    {
        free(pShift);
    }

    return l_u8_ret;
}

是否是我遗漏了什么步骤，导致连续调用不能正常工作，若我想连续计算fft，该如何修改代码？希望ti工程师能帮忙指出问题。

1 个月前

0 Eirwen 1 个月前

TI__Genius 14035 points

已经收到了您的案例，调查需要些时间，感谢您的耐心等待。

0 Vivian Gao 1 个月前回复 Eirwen

TI__Mastermind 24450 points

你能告诉我们你正在为C7X DSP开发什么吗？这是一个什么项目吗？

0 Vivian Gao 1 个月前回复 Vivian Gao

TI__Mastermind 24450 points

你能分享一个测试代码和测试输入吗。
您还可以共享链接器脚本来检查内存配置。
此外，由于提供的代码中不存在battch_fft1d_info_type，我创建了一个结构体。你能检查一下是否正确吗

struct battch_fft1d_info_type{
    uint32_t num_shifts;
    uint32_t channel;
    uint32_t num_points;
    int data_type;
};

0 ? ?? 1 个月前回复 Vivian Gao

Prodigy 20 points

感谢回复，正在开发毫米波雷达，这部分是做信号处理

0 ? ?? 1 个月前回复 Vivian Gao

Prodigy 20 points

结构体这样写是正确的，我的链接器脚本如下:

Fullscreen

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
--ram_model
-heap  0x20000
-stack 0x20000
--args 0x1000
--diag_suppress=10068 /* to suppress no matching section error */
--cinit_compression=off
-e _c_int00_secure
#define DDR0_ALLOCATED_START  0xAD000000      /* 0xAD000000 */
#define C7X_ALLOCATED_START DDR0_ALLOCATED_START
#define C7X_RESOURCE_TABLE_BASE (C7X_ALLOCATED_START + 0x00100000)
#define C7X_IPC_TRACE_BUFFER    (C7X_ALLOCATED_START + 0x00100400)
#define C7X_BOOT_BASE           (C7X_ALLOCATED_START + 0x00200000)
#define C7X_VECTOR_BASE         (C7X_ALLOCATED_START + 0x00400000)
#define C7X_DDR_SPACE_BASE      (C7X_ALLOCATED_START + 0x00600000)
MEMORY
{
    /*L2SRAM_CINIT (RWX)  : org = 0x7E000000, len = 0x000100*/   // for 256byte init     c7x_0 = 7E000000, c7x_1 = 7E200000
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

--ram_model
-heap  0x20000
-stack 0x20000
--args 0x1000
--diag_suppress=10068 /* to suppress no matching section error */
--cinit_compression=off
-e _c_int00_secure

#define DDR0_ALLOCATED_START  0xAD000000      /* 0xAD000000 */

#define C7X_ALLOCATED_START DDR0_ALLOCATED_START

#define C7X_RESOURCE_TABLE_BASE (C7X_ALLOCATED_START + 0x00100000)
#define C7X_IPC_TRACE_BUFFER    (C7X_ALLOCATED_START + 0x00100400)
#define C7X_BOOT_BASE           (C7X_ALLOCATED_START + 0x00200000)
#define C7X_VECTOR_BASE         (C7X_ALLOCATED_START + 0x00400000)
#define C7X_DDR_SPACE_BASE      (C7X_ALLOCATED_START + 0x00600000)

MEMORY
{
    /*L2SRAM_CINIT (RWX)  : org = 0x7E000000, len = 0x000100*/   // for 256byte init     c7x_0 = 7E000000, c7x_1 = 7E200000
    L2SRAM (RWX)        : org = 0x7E000100, len = 0x200000    // for 2MBytes  EL2  0x1fff00
    L2SRAMAUX   (RWX): org = 0x7F000000, len = 0x040000       // for 256 KBytes J7AEN c7x_0 = 7F000000, c7x_1 = 7F800000

    /* L2SRAM (RWX):  org = 0x7E000000,                len = 0x200000 */
    DDR0_RESERVED: org = 0x80000000,                len = 0x19800000         /*  Reserved for A53 OS */
    C7X_IPC_D:     org = C7X_ALLOCATED_START,       len = 0x00100000         /*  1MB DDR */
    C7X_BOOT_D:    org = C7X_BOOT_BASE,             len = 0x400              /*  1024B DDR */
    C7X_VECS_D:    org = C7X_VECTOR_BASE,           len = 0x4000             /*  16KB DDR */
    C7X_CIO_MEM:   org = C7X_DDR_SPACE_BASE,        len = 0x1000             /*  4KB */
    /*C7X_DDR_SPACE: org = C7X_DDR_SPACE_BASE+0x1000, len = 0x00BF0000-0x1000*/  /*  11.9MB - 4KB DDR  0x3BE6666-59.9M 0x00BF0000-11.9M*/
    C7X_DDR_SPACE: org = C7X_DDR_SPACE_BASE+0x1000, len = 0x3BE6666-0x1000  /*  59.9MB - 4KB DDR  0x3BE6666-59.9M 0x00BF0000-11.9M*/
    /* For resource table */
    C7X_RT_D:      org = C7X_RESOURCE_TABLE_BASE, len = 0x400         /*  1024B DDR */
    /* IPC trace buffer */
    LINUX_IPC_TRACE_BUFFER: org = C7X_IPC_TRACE_BUFFER, len = 0xFFC00 /* 1023KB DDR */
    /*LOG_SHM_MEM             : ORIGIN = 0xA7000000, LENGTH = 0x40000*/
    /* Shared memory for RTOS NORTOS IPC */
    RTOS_NORTOS_IPC_SHM_MEM: org = 0xA5000000, len = 0x1C00000  /* 8MB DDR ,0x1C00000 = 28M  32MB*/

    /* xhq test share memmory */
    /*SHARED_DATA_MEM_CX0 (RWX): org = 0xA0100000, len = 0xF00000 */         /* 2MB, all:15MB DDR */
    SHARED_DATA_MEM_CX0 (RWX): org = 0xA7000000, len = 0x06000000           /* 96MB DDR */

    /*SHARED_DATA_MEM_CX0 (RWX): org = 0x880000000, len = 0x06000000*/          /* 2MB, all:15MB DDR */
}

SECTIONS
{
    boot:
    {
      boot.*<boot.oe71>(.text)
    } load > C7X_BOOT_D
    .vecs       >       C7X_VECS_D
    .secure_vecs    >   C7X_DDR_SPACE ALIGN(0x100000)
    .text:_c_int00_secure > C7X_DDR_SPACE ALIGN(0x200000)
    .text       >       C7X_DDR_SPACE ALIGN(0x100000)

    .l1dmemory  (NOLOAD)(NOINIT) : {} > L2SRAM
    .l2dmemory  (NOLOAD)(NOINIT) : {} > L2SRAM
    .bss        >       C7X_DDR_SPACE  /* Zero-initialized data */
    RUN_START(__BSS_START)
    RUN_END(__BSS_END)

    .data       >       C7X_DDR_SPACE  /* Initialized data */

    .cinit      >       C7X_DDR_SPACE  /* could be part of const */
    .init_array >       C7X_DDR_SPACE  /* C++ initializations */
    .stack      >       C7X_DDR_SPACE ALIGN(0x2000)
    .args       >       C7X_DDR_SPACE
    .cio        >       C7X_CIO_MEM
    .const      >       C7X_DDR_SPACE
    .switch     >       C7X_DDR_SPACE /* For exception handling. */
    .sysmem     >       C7X_DDR_SPACE /* heap */

    GROUP:              >  C7X_DDR_SPACE
    {
        .data.Mmu_tableArray          : type=NOINIT
        .data.Mmu_tableArraySlot      : type=NOINIT
        .data.Mmu_level1Table         : type=NOINIT
        .data.gMmu_tableArray_NS       : type=NOINIT
        .data.Mmu_tableArraySlot_NS   : type=NOINIT
        .data.Mmu_level1Table_NS      : type=NOINIT
    }

    .benchmark_buffer:     > C7X_DDR_SPACE ALIGN (32)

    /* This is the resource table used by linux to know where the IPC "VRINGs" are located */
    .resource_table: { __RESOURCE_TABLE = .;} > C7X_RT_D
    /* This IPC log can be viewed via ROV in CCS and when linux is enabled, this log can also be viewed via linux debugfs */
    .bss.debug_mem_trace_buf    : {} palign(128)    > LINUX_IPC_TRACE_BUFFER
    /* this is used when Debug log's to shared memory is enabled, else this is not used */
    /*.bss.log_shared_mem  (NOLOAD) : {} > LOG_SHM_MEM*/
    /* this is used only when IPC RPMessage is enabled */
    .bss.ipc_vring_mem   (NOLOAD) : {} > RTOS_NORTOS_IPC_SHM_MEM

    /*.l1mem              (NOLOAD)(NOINIT) : {} > L2SRAM_CINIT*/
    .l2mem              (NOLOAD)(NOINIT) : {} > L2SRAM
    .l3mem              (NOLOAD)(NOINIT) : {} > L2SRAMAUX
    
    .user_array0     (NOLOAD) : {} > SHARED_DATA_MEM_CX0
    /*.user_array1     (NOLOAD) : {} > SHARED_DATA_MEM_1*/
    /*.user_array2     (NOLOAD) : {} > SHARED_DATA_MEM_2*/
    /*.user_array3     (NOLOAD) : {} > SHARED_DATA_MEM_3*/
    /*.user_array4     (NOLOAD) : {} > SHARED_DATA_MEM_4*/
}