你好
我在开发dsp c7x时碰到连续调用FFTLIB_fft1dBatched_i16sc_c16sc_o16sc_kernel函数时,第一次fft计算结果正常,第二次fft计算结果异常,单独进行某一次的fft(只使用第一次fftt或者只使用第二次fft)结果均正确。第一次fft输入数组为128通道1024个采样点,第二次输入数组为512通道128个采样点,代码如下:
__attribute__((section(".l2mem"), aligned(64))) int16_t l2_user_array0[128][2048]; // 512k __attribute__((section(".l2mem"), aligned(64))) int16_t l2_user_array1[128][2048]; // 512k __attribute__((section(".l2mem"), aligned(64))) int16_t l2_user_array2[512][256]; // 256k __attribute__((section(".l2mem"), aligned(64))) int16_t l2_user_array3[512][256]; // 256k void func() { battch_fft1d_info_type l_battch_fft1d_info = {0}; l_battch_fft1d_info.num_shifts = 5; l_battch_fft1d_info.channel = 128; l_battch_fft1d_info.num_points = 1024; l_battch_fft1d_info.data_type = FFTLIB_INT16; bsp_dsppro_battch_fft1d((int16_t *)l2_user_array0, (int16_t *)l2_user_array1, &l_battch_fft1d_info); l_battch_fft1d_info.num_shifts = 3; l_battch_fft1d_info.channel = 512; l_battch_fft1d_info.num_points = 128; l_battch_fft1d_info.data_type = FFTLIB_INT16; bsp_dsppro_battch_fft1d((int16_t *)l2_user_array2, (int16_t *)l2_user_array3, &l_battch_fft1d_info); } uint8_t bsp_dsppro_battch_fft1d(int16_t *input, int16_t *output, battch_fft1d_info_type *battch_fft1d_info) { uint8_t l_u8_ret = 0; int16_t *pX; int16_t *pY; int16_t *pW; uint32_t *pShift; FFTLIB_bufParams1D_t bufParamsData; FFTLIB_bufParams1D_t bufParamsShift; FFTLIB_bufParams1D_t bufParamsTw; FFTLIB_STATUS status_opt = FFTLIB_SUCCESS; uint32_t numShifts = battch_fft1d_info->num_shifts; // 5:1024, 3:128 uint32_t l_u32_channel = battch_fft1d_info->channel; // 128 chirp, 1024 point uint32_t numPoints = battch_fft1d_info->num_points; // 1024 point, 128 chirp uint32_t dataMemSize = l_u32_channel * numPoints * 2; /* Kernel requires input/output */ /* buffers to be atleast * 128 elements long */ uint8_t *pblock = NULL; pblock = FFTLIB_fft1dbatched_i16sc_c16sc_o16sc_pBlock; pX = (int16_t *)input; pY = (int16_t *)output; pW = malloc(numPoints * 2 * sizeof (int16_t)); pShift = malloc(numShifts * sizeof (uint32_t)); if ((pX == NULL) || (pY == NULL) || (pW == NULL) || (pShift == NULL)) { DebugP_log("[info]pX is NULL!\r\n"); l_u8_ret = 1; goto error; } bufParamsData.dim_x = dataMemSize; bufParamsData.data_type = FFTLIB_INT16; bufParamsShift.dim_x = numShifts; bufParamsShift.data_type = FFTLIB_UINT32; bufParamsTw.dim_x = numPoints * 2; bufParamsTw.data_type = FFTLIB_INT16; tw_gen (pW, numPoints); /* 批量fft变换 */ /* 批量fft初始化 */ status_opt = FFTLIB_fft1dBatched_i16sc_c16sc_o16sc_init((int16_t *) pX, &bufParamsData, (int16_t *) pW, &bufParamsTw, (int16_t *) pY, &bufParamsData, (uint32_t *) pShift, &bufParamsShift, numPoints, l_u32_channel, pblock); if (status_opt != FFTLIB_SUCCESS) { l_u8_ret = 1; goto error; // /* 批量fft参数检查 */ // status_opt = FFTLIB_fft1dBatched_i16sc_c16sc_o16sc_checkParams((int16_t *) pX, &bufParamsData, (int16_t *) pW, &bufParamsTw, // (int16_t *) pY, &bufParamsData, (uint32_t *) pShift, &bufParamsShift, // numPoints, l_u32_channel, pblock); // if (status_opt != FFTLIB_SUCCESS) // { // l_u8_ret = 2; // goto error; // } /* 批量执行fft */ status_opt = FFTLIB_fft1dBatched_i16sc_c16sc_o16sc_kernel((int16_t *) pX, &bufParamsData, (int16_t *) pW, &bufParamsTw, (int16_t *) pY, &bufParamsData, (uint32_t *) pShift, &bufParamsShift, numPoints, l_u32_channel, pblock); if (status_opt != FFTLIB_SUCCESS) { l_u8_ret = 3; goto error; } error: /* 释放内存 */ if (pW != NULL) { free(pW); } if (pShift != NULL) { free(pShift); } return l_u8_ret; }
是否是我遗漏了什么步骤,导致连续调用不能正常工作,若我想连续计算fft,该如何修改代码?希望ti工程师能帮忙指出问题。
你能分享一个测试代码和测试输入吗。
您还可以共享链接器脚本来检查内存配置。
此外,由于提供的代码中不存在battch_fft1d_info_type,我创建了一个结构体。你能检查一下是否正确吗
struct battch_fft1d_info_type{
uint32_t num_shifts;
uint32_t channel;
uint32_t num_points;
int data_type;
};
结构体这样写是正确的,我的链接器脚本如下:
--ram_model -heap 0x20000 -stack 0x20000 --args 0x1000 --diag_suppress=10068 /* to suppress no matching section error */ --cinit_compression=off -e _c_int00_secure #define DDR0_ALLOCATED_START 0xAD000000 /* 0xAD000000 */ #define C7X_ALLOCATED_START DDR0_ALLOCATED_START #define C7X_RESOURCE_TABLE_BASE (C7X_ALLOCATED_START + 0x00100000) #define C7X_IPC_TRACE_BUFFER (C7X_ALLOCATED_START + 0x00100400) #define C7X_BOOT_BASE (C7X_ALLOCATED_START + 0x00200000) #define C7X_VECTOR_BASE (C7X_ALLOCATED_START + 0x00400000) #define C7X_DDR_SPACE_BASE (C7X_ALLOCATED_START + 0x00600000) MEMORY { /*L2SRAM_CINIT (RWX) : org = 0x7E000000, len = 0x000100*/ // for 256byte init c7x_0 = 7E000000, c7x_1 = 7E200000 L2SRAM (RWX) : org = 0x7E000100, len = 0x200000 // for 2MBytes EL2 0x1fff00 L2SRAMAUX (RWX): org = 0x7F000000, len = 0x040000 // for 256 KBytes J7AEN c7x_0 = 7F000000, c7x_1 = 7F800000 /* L2SRAM (RWX): org = 0x7E000000, len = 0x200000 */ DDR0_RESERVED: org = 0x80000000, len = 0x19800000 /* Reserved for A53 OS */ C7X_IPC_D: org = C7X_ALLOCATED_START, len = 0x00100000 /* 1MB DDR */ C7X_BOOT_D: org = C7X_BOOT_BASE, len = 0x400 /* 1024B DDR */ C7X_VECS_D: org = C7X_VECTOR_BASE, len = 0x4000 /* 16KB DDR */ C7X_CIO_MEM: org = C7X_DDR_SPACE_BASE, len = 0x1000 /* 4KB */ /*C7X_DDR_SPACE: org = C7X_DDR_SPACE_BASE+0x1000, len = 0x00BF0000-0x1000*/ /* 11.9MB - 4KB DDR 0x3BE6666-59.9M 0x00BF0000-11.9M*/ C7X_DDR_SPACE: org = C7X_DDR_SPACE_BASE+0x1000, len = 0x3BE6666-0x1000 /* 59.9MB - 4KB DDR 0x3BE6666-59.9M 0x00BF0000-11.9M*/ /* For resource table */ C7X_RT_D: org = C7X_RESOURCE_TABLE_BASE, len = 0x400 /* 1024B DDR */ /* IPC trace buffer */ LINUX_IPC_TRACE_BUFFER: org = C7X_IPC_TRACE_BUFFER, len = 0xFFC00 /* 1023KB DDR */ /*LOG_SHM_MEM : ORIGIN = 0xA7000000, LENGTH = 0x40000*/ /* Shared memory for RTOS NORTOS IPC */ RTOS_NORTOS_IPC_SHM_MEM: org = 0xA5000000, len = 0x1C00000 /* 8MB DDR ,0x1C00000 = 28M 32MB*/ /* xhq test share memmory */ /*SHARED_DATA_MEM_CX0 (RWX): org = 0xA0100000, len = 0xF00000 */ /* 2MB, all:15MB DDR */ SHARED_DATA_MEM_CX0 (RWX): org = 0xA7000000, len = 0x06000000 /* 96MB DDR */ /*SHARED_DATA_MEM_CX0 (RWX): org = 0x880000000, len = 0x06000000*/ /* 2MB, all:15MB DDR */ } SECTIONS { boot: { boot.*<boot.oe71>(.text) } load > C7X_BOOT_D .vecs > C7X_VECS_D .secure_vecs > C7X_DDR_SPACE ALIGN(0x100000) .text:_c_int00_secure > C7X_DDR_SPACE ALIGN(0x200000) .text > C7X_DDR_SPACE ALIGN(0x100000) .l1dmemory (NOLOAD)(NOINIT) : {} > L2SRAM .l2dmemory (NOLOAD)(NOINIT) : {} > L2SRAM .bss > C7X_DDR_SPACE /* Zero-initialized data */ RUN_START(__BSS_START) RUN_END(__BSS_END) .data > C7X_DDR_SPACE /* Initialized data */ .cinit > C7X_DDR_SPACE /* could be part of const */ .init_array > C7X_DDR_SPACE /* C++ initializations */ .stack > C7X_DDR_SPACE ALIGN(0x2000) .args > C7X_DDR_SPACE .cio > C7X_CIO_MEM .const > C7X_DDR_SPACE .switch > C7X_DDR_SPACE /* For exception handling. */ .sysmem > C7X_DDR_SPACE /* heap */ GROUP: > C7X_DDR_SPACE { .data.Mmu_tableArray : type=NOINIT .data.Mmu_tableArraySlot : type=NOINIT .data.Mmu_level1Table : type=NOINIT .data.gMmu_tableArray_NS : type=NOINIT .data.Mmu_tableArraySlot_NS : type=NOINIT .data.Mmu_level1Table_NS : type=NOINIT } .benchmark_buffer: > C7X_DDR_SPACE ALIGN (32) /* This is the resource table used by linux to know where the IPC "VRINGs" are located */ .resource_table: { __RESOURCE_TABLE = .;} > C7X_RT_D /* This IPC log can be viewed via ROV in CCS and when linux is enabled, this log can also be viewed via linux debugfs */ .bss.debug_mem_trace_buf : {} palign(128) > LINUX_IPC_TRACE_BUFFER /* this is used when Debug log's to shared memory is enabled, else this is not used */ /*.bss.log_shared_mem (NOLOAD) : {} > LOG_SHM_MEM*/ /* this is used only when IPC RPMessage is enabled */ .bss.ipc_vring_mem (NOLOAD) : {} > RTOS_NORTOS_IPC_SHM_MEM /*.l1mem (NOLOAD)(NOINIT) : {} > L2SRAM_CINIT*/ .l2mem (NOLOAD)(NOINIT) : {} > L2SRAM .l3mem (NOLOAD)(NOINIT) : {} > L2SRAMAUX .user_array0 (NOLOAD) : {} > SHARED_DATA_MEM_CX0 /*.user_array1 (NOLOAD) : {} > SHARED_DATA_MEM_1*/ /*.user_array2 (NOLOAD) : {} > SHARED_DATA_MEM_2*/ /*.user_array3 (NOLOAD) : {} > SHARED_DATA_MEM_3*/ /*.user_array4 (NOLOAD) : {} > SHARED_DATA_MEM_4*/ }