Test Condition
MCU: STM32G431RB @170MHz
IDE: IAR V9.40
Optimization: -o3
Benchmark
Time consuming
Float32 | Q31 | Q15 | |
---|---|---|---|
Duration Without Calc Mag (us) | 4912 | 1659 | 1001 |
Duration Calc Mag (us) | 5948 | 1943 | 1243 |
Result representation
Float32 | Q31 | Q15 | |
---|---|---|---|
Input DC | 64 | 1024 | 128 |
Output DC | ~128 | ~1024 | ~128 |
Input AC | 1024 | 1024 | 1024 |
Output AC | ~1024 | ~512 | ~512 |
Key points
- 定点RFFT的输出Buffer长度必须是输入的2倍 (浮点没有这个要求),这个没有在源代码里说明,只在官方网页上有一行描述:
Official page
If the input buffer is of length N (fftLenReal), the output buffer must have length 2N since it is containing the conjugate part (except for MVE version where N+2 is enough). The input buffer is modified by this function.
For the RIFFT, the source buffer must have length N+2 since the Nyquist frequency value is needed but conjugate part is ignored. It is not using the packing trick of the float version.
- 关于数据定标,如果输入的数据格式是Q15,则FFT的结果已经不是Q15了,已经放大了(同样见Official page表格)。比如2048点的Q15输入,输出变成了Q4。个人认为这种定标没什么意义,还不如直接给出结果跟原始数据的关系,因为这种系数纯粹是计算过程引入的,并没有什么物理意义,而且还随点数的变化而变化。有可能不管点数多少,输入幅值和输出幅值都是确定的关系,那这种定标就更没意义了。按前面实测表格,定点数的输出DC值与输入DC值接近,而输出AC幅值则约为输入幅值的一半;而浮点格式下输出DC值约为输入的2倍,而输出AC幅值与输入AC幅值接近。
- 官方的取模函数是个大坑,它把实部和虚部求平方和后把这个平方和的结果归一回Q14(右移17位),然后再求开方,这样平方和结果小于17位的输入就都被移成0了。对RFFT来说,虚部接近于零,因此小于9位的数据基本都被移为0了。即使较大的数开方后的结果也变得很小了。初步比较,发现计算过程是平方和右移17位,开方后左移6位,或者说是平方和右移5位再开方。迷之操作。
Test Code
/* USER CODE BEGIN Header */
/**
******************************************************************************
* @file : main.c
* @brief : Main program body
******************************************************************************
* @attention
*
* Copyright (c) 2024 STMicroelectronics.
* All rights reserved.
*
* This software is licensed under terms that can be found in the LICENSE file
* in the root directory of this software component.
* If no LICENSE file comes with this software, it is provided AS-IS.
*
******************************************************************************
*/
/* USER CODE END Header */
/* Includes ------------------------------------------------------------------*/
#include "main.h"
#define GENERATE_INPUT_ONLINE
// #define Q31_TEST
// #define USER_MAG_CALC
/* Private includes ----------------------------------------------------------*/
/* USER CODE BEGIN Includes */
#include <stdbool.h>
#include "arm_math.h"
#ifndef GENERATE_INPUT_ONLINE
#include "iInputData.h"
#endif
#include "arm_const_structs.h"
/* USER CODE END Includes */
/* Private typedef -----------------------------------------------------------*/
/* USER CODE BEGIN PTD */
/* USER CODE END PTD */
/* Private define ------------------------------------------------------------*/
/* USER CODE BEGIN PD */
/* USER CODE END PD */
/* Private macro -------------------------------------------------------------*/
/* USER CODE BEGIN PM */
/* USER CODE END PM */
/* Private variables ---------------------------------------------------------*/
/* USER CODE BEGIN PV */
/* USER CODE END PV */
/* Private function prototypes -----------------------------------------------*/
void SystemClock_Config(void);
static void MX_CORDIC_Init(void);
/* USER CODE BEGIN PFP */
#define N 2048
#define SAMPLE_FREQUENCY 1000
#ifdef FLOAT_TEST
float32_t fInputData[N];
float32_t fOutputData[N+2];
float32_t fMag[N+1];
#elif defined(Q31_TEST)
// arm_rfft_instance_q15 rfftInstance;
#ifdef GENERATE_INPUT_ONLINE
#pragma data_alignment=16
static q31_t iInputData[N];
#endif
#pragma data_alignment=16
q31_t iOutputData[N*2];
q31_t iFFT_Mag[(N>>1)+1];
#else
// arm_rfft_instance_q15 rfftInstance;
#ifdef GENERATE_INPUT_ONLINE
#pragma data_alignment=16
static q15_t iInputData[N];
#endif
#pragma data_alignment=16
q15_t iOutputData[N*2];
#ifdef USER_MAG_CALC
q31_t iFFT_Mag[(N>>1)+1];
#else
q15_t iFFT_Mag[(N>>1)+1];
#endif
#endif
uint16_t frequency1 = 8; // Hz
uint16_t frequency2 = 32; // Hz
uint16_t amplitude1 = 1024;
uint16_t amplitude2 = 256;
q15_t iOffset = 128;
uint32_t DurationUs = 0;
uint16_t binIndexScale = (1UL << 16) / N;
uint16_t bin1Index = 1;
uint16_t bin2Index = 2;
bool calcMag = false;
bool runOnce = false;
#ifdef FLOAT_TEST
extern const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len2048;
#elif defined(Q31_TEST)
extern const arm_rfft_instance_q31 arm_rfft_sR_q31_len2048;
#else
extern const arm_rfft_instance_q15 arm_rfft_sR_q15_len2048;
#endif
/* USER CODE END PFP */
/* Private user code ---------------------------------------------------------*/
/* USER CODE BEGIN 0 */
#ifndef FLOAT_TEST
void user_cmplx_mag_q15(q15_t* pSrc, q31_t* pDst, uint16_t numSamples)
{
#if defined (ARM_MATH_DSP)
q31_t in;
q31_t acc0; /* Accumulators */
#else
q15_t real, imag; /* Temporary input variables */
q31_t acc0, acc1; /* Accumulators */
#endif
uint16_t blkCnt = numSamples;
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
/* store result in 2.14 format in destination buffer. */
// arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
*pDst++ = acc0;
#else
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
/* store result in 2.14 format in destination buffer. */
// arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
#endif
void TimeMeasureInit(void)
{
CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk;
if (DWT->CTRL != 0U)
{ /* Check if DWT is present. */
DWT->CYCCNT = 0;
DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk; /* Enable Cycle Counter. */
}
}
void Setup(void)
{
/* Cordic setup */
LL_CORDIC_Config( CORDIC,
LL_CORDIC_FUNCTION_SINE,
LL_CORDIC_PRECISION_5CYCLES,
LL_CORDIC_SCALE_0,
LL_CORDIC_NBWRITE_1,
LL_CORDIC_NBREAD_1,
LL_CORDIC_INSIZE_16BITS,
LL_CORDIC_OUTSIZE_16BITS);
/* setup time measurement */
TimeMeasureInit();
/* Setup fft instance */
// arm_rfft_init_q15(&rfftInstance, N, false, true);
}
void FFT_GenerateDate(void)
{
// uint32_t deltaTheta1 = (uint64_t)UINT32_MAX * frequency1 / SAMPLE_FREQUENCY;
// uint32_t deltaTheta2 = (uint64_t)UINT32_MAX * frequency2 / SAMPLE_FREQUENCY;
for (int i = 0; i < N; i++) {
// uint32_t theta1 = ((int64_t)i * deltaTheta1) >> 16; // convert to q15
uint32_t theta1 = i * bin1Index * binIndexScale; // convert to q15
int16_t d;
// float32_t f;
uint32_t arg = ((uint32_t)amplitude1 << 16) + (uint16_t)theta1;
LL_CORDIC_WriteData(CORDIC, arg);
arg = LL_CORDIC_ReadData(CORDIC);
d = ((arg >> 16) & 0xFFFF) + iOffset;
uint32_t theta2 = i * bin2Index * binIndexScale; // convert to q15
arg = ((uint32_t)amplitude2 << 16) + (uint16_t)theta2;
LL_CORDIC_WriteData(CORDIC, arg);
arg = LL_CORDIC_ReadData(CORDIC);
d += ((arg >> 16) & 0xFFFF);
#ifdef FLOAT_TEST
fInputData[i] = (float32_t)d/amplitude1;
#else
iInputData[i] = d;
#endif
}
}
#ifndef FLOAT_TEST
void iFFTCalc(void)
{
#ifdef Q31_TEST
arm_rfft_q31(&arm_rfft_sR_q31_len2048, iInputData, iOutputData);
if (calcMag) {
arm_cmplx_mag_q31(iOutputData, iFFT_Mag, N/2 + 1);
}
#else
arm_rfft_q15(&arm_rfft_sR_q15_len2048, iInputData, iOutputData);
if (calcMag) {
#ifdef USER_MAG_CALC
user_cmplx_mag_q15(iOutputData, iFFT_Mag, N/2 + 1);
#else
arm_cmplx_mag_q15(iOutputData, iFFT_Mag, N/2 + 1);
#endif
}
#endif
}
#endif
#ifdef FLOAT_TEST
void fFFTCalc(void)
{
arm_rfft_fast_f32(&arm_rfft_fast_sR_f32_len2048, fInputData, fOutputData, 0);
if (calcMag) {
arm_cmplx_mag_f32(fOutputData, fMag, N);
}
}
#endif
uint32_t TimeMeasure(uint32_t startTick, uint32_t stopTick)
{
uint32_t deltaTick;
if (stopTick < startTick)
{
deltaTick = (UINT32_MAX - startTick) + stopTick;
}
else
{
deltaTick = stopTick - startTick;
}
return (deltaTick / 170);
}
/* USER CODE END 0 */
/**
* @brief The application entry point.
* @retval int
*/
int main(void)
{
/* USER CODE BEGIN 1 */
/* USER CODE END 1 */
/* MCU Configuration--------------------------------------------------------*/
/* Reset of all peripherals, Initializes the Flash interface and the Systick. */
HAL_Init();
/* USER CODE BEGIN Init */
/* USER CODE END Init */
/* Configure the system clock */
SystemClock_Config();
/* USER CODE BEGIN SysInit */
/* USER CODE END SysInit */
/* Initialize all configured peripherals */
MX_CORDIC_Init();
/* USER CODE BEGIN 2 */
SysTick->CTRL &=~SysTick_CTRL_TICKINT_Msk;
Setup();
/* USER CODE END 2 */
/* Infinite loop */
/* USER CODE BEGIN WHILE */
while (1)
{
/* USER CODE END WHILE */
if (runOnce) {
#ifdef GENERATE_INPUT_ONLINE
FFT_GenerateDate();
#endif
uint32_t startTick = DWT->CYCCNT;
#ifdef FLOAT_TEST
fFFTCalc();
#else
iFFTCalc();
#endif
uint32_t stopTick = DWT->CYCCNT;
DurationUs = TimeMeasure(startTick, stopTick);
runOnce = false;
}
/* USER CODE BEGIN 3 */
}
/* USER CODE END 3 */
}
/**
* @brief System Clock Configuration
* @retval None
*/
void SystemClock_Config(void)
{
RCC_OscInitTypeDef RCC_OscInitStruct = {0};
RCC_ClkInitTypeDef RCC_ClkInitStruct = {0};
/** Configure the main internal regulator output voltage
*/
HAL_PWREx_ControlVoltageScaling(PWR_REGULATOR_VOLTAGE_SCALE1);
/** Initializes the RCC Oscillators according to the specified parameters
* in the RCC_OscInitTypeDef structure.
*/
RCC_OscInitStruct.OscillatorType = RCC_OSCILLATORTYPE_HSI;
RCC_OscInitStruct.HSIState = RCC_HSI_ON;
RCC_OscInitStruct.HSICalibrationValue = RCC_HSICALIBRATION_DEFAULT;
RCC_OscInitStruct.PLL.PLLState = RCC_PLL_NONE;
if (HAL_RCC_OscConfig(&RCC_OscInitStruct) != HAL_OK)
{
Error_Handler();
}
/** Initializes the CPU, AHB and APB buses clocks
*/
RCC_ClkInitStruct.ClockType = RCC_CLOCKTYPE_HCLK|RCC_CLOCKTYPE_SYSCLK
|RCC_CLOCKTYPE_PCLK1|RCC_CLOCKTYPE_PCLK2;
RCC_ClkInitStruct.SYSCLKSource = RCC_SYSCLKSOURCE_HSI;
RCC_ClkInitStruct.AHBCLKDivider = RCC_SYSCLK_DIV1;
RCC_ClkInitStruct.APB1CLKDivider = RCC_HCLK_DIV1;
RCC_ClkInitStruct.APB2CLKDivider = RCC_HCLK_DIV1;
if (HAL_RCC_ClockConfig(&RCC_ClkInitStruct, FLASH_LATENCY_0) != HAL_OK)
{
Error_Handler();
}
}
/**
* @brief CORDIC Initialization Function
* @param None
* @retval None
*/
static void MX_CORDIC_Init(void)
{
/* USER CODE BEGIN CORDIC_Init 0 */
/* USER CODE END CORDIC_Init 0 */
/* Peripheral clock enable */
LL_AHB1_GRP1_EnableClock(LL_AHB1_GRP1_PERIPH_CORDIC);
/* USER CODE BEGIN CORDIC_Init 1 */
/* USER CODE END CORDIC_Init 1 */
/* nothing else to be configured */
/* USER CODE BEGIN CORDIC_Init 2 */
/* USER CODE END CORDIC_Init 2 */
}
/* USER CODE BEGIN 4 */
/* USER CODE END 4 */
/**
* @brief This function is executed in case of error occurrence.
* @retval None
*/
void Error_Handler(void)
{
/* USER CODE BEGIN Error_Handler_Debug */
/* User can add his own implementation to report the HAL error return state */
__disable_irq();
while (1)
{
}
/* USER CODE END Error_Handler_Debug */
}
#ifdef USE_FULL_ASSERT
/**
* @brief Reports the name of the source file and the source line number
* where the assert_param error has occurred.
* @param file: pointer to the source file name
* @param line: assert_param error line source number
* @retval None
*/
void assert_failed(uint8_t *file, uint32_t line)
{
/* USER CODE BEGIN 6 */
/* User can add his own implementation to report the file name and line number,
ex: printf("Wrong parameters value: file %s on line %d\r\n", file, line) */
/* USER CODE END 6 */
}
#endif /* USE_FULL_ASSERT */