CORDIC算法与泰勒级数计算三角函数sin,cos的速度与精度对比

磁悬浮青蛙呱呱呱

已于 2024-05-09 08:43:52 修改

阅读量300

点赞数 1

文章标签： fpga开发 CORDIC算法泰勒级数

于 2024-05-09 01:41:14 首次发布

本文链接：https://blog.csdn.net/pijianzhirui/article/details/138591884

版权

在学习CORDIC(Coordinate Rotation Digital Computer)算法的时候，最主要的疑问是：浮点数怎么做移位运算？如果CORDIC算法中不使用移位运算，而是直接使用浮点数乘法(除以浮点常数的除法会被C++编译器优化为乘法)，那么CORDIC算法的浮点乘法次数大大超过同精度的泰勒级数，完全失去了加快计算速度的意义。本文只考虑角度在(0, pi/2)内的情形，使用整型变量实现CORDIC算法(FPGA程序中通常使用定点数实现)，与浮点数实现的泰勒级数算法进行速度与精度的对比，只在电脑CPU进行了测试。因为电脑CPU都有专门的浮点运算单元(FPU)，所以浮点乘法相较于移位运算，耗时并没有长太多。下面的测试表明，在电脑CPU上，泰勒级数的精度和速度都大大超过了CORDIC算法。本文中的泰勒级数算法(还缺少对任意角的处理)的速度和精度已经十分接近math.h标准库中的函数了。廉价单片机和计算器都没有FPU，浮点乘法速度非常慢，有较大可能应该采用CORDIC算法。

#include <stdio.h>
#include <cstdint>
#include <math.h>
#include <time.h>

// #define SINGLE_PRECISION

// K=1/(sqrt(1+1)*sqrt(1+1/4)*sqrt(1+1/16)*sqrt(1+1/64)...) 是一个无穷乘积
// K=0.6072529350088812561694467525049282631124...
// 在单精度情形，先将K * 10^9化成整数(要确保不超过2^32 =4294967296)，
// 再进行加减法和移位运算，最后乘上1e-9化为浮点数。双精度情形类似。

#ifdef SINGLE_PRECISION
typedef float real;
typedef int32_t integer;
constexpr int32_t K = 607252935;
//             2^32 =4294967296
constexpr float scale = 1e-9;
constexpr uint8_t numIter = 32;  //设得更大就会出错
#else
typedef double real;
typedef int64_t integer;
constexpr int64_t K =  607252935008881256;
//             2^64 =18446744073709551616
constexpr double scale = 1e-18;
constexpr uint8_t numIter = 40;
#endif

// 如果constexpr int64_t K =  6072529350088812561;
//                     2^64 =18446744073709551616
//                     2^63 = 9223372036854775808
// constexpr double scale = 1e-19;
// 
// 则Q=1.5时，迭代中会出现x,y超过2^63的情况，导致溢出
// 把int64_t改为uint64_t也不行，Q=1.5时，会出现小数减大数的情况，导致错误。


// atan(1/2), atan(1/4), atan(1/8), atan(1/16), atan(1/32), atan(1/64), ...
constexpr real angles[41] = {
0.785398163397448, 0.463647609000806, 0.244978663126864, 0.124354994546761,
0.062418809995957, 0.031239833430268, 0.015623728620477, 0.007812341060101,
0.003906230131967, 0.001953122516479, 0.000976562189559, 0.000488281211195,
0.000244140620149, 0.000122070311894, 0.000061035156174, 0.000030517578116,
0.000015258789061, 0.000007629394531, 0.000003814697266, 0.000001907348633,
0.000000953674316, 0.000000476837158, 0.000000238418579, 0.000000119209290,
0.000000059604645, 0.000000029802322, 0.000000014901161, 0.000000007450581,
0.000000003725290, 0.000000001862645, 0.000000000931323, 0.000000000465661,
0.000000000232831, 0.000000000116415, 0.000000000058208, 0.000000000029104,
0.000000000014552, 0.000000000007276, 0.000000000003638, 0.000000000001819,
0.000000000000909 };


void cordic(real Q, real* sinQ, real* cosQ) {     // Q的单位是弧度
	integer x = K, y = 0, temp;
	real  q = 0;
	for (uint8_t n = 0; n < numIter; n++) {
		temp = x;
		if (q < Q) {
			temp = x - (y >> n);
			y = (x >> n) + y;
			x = temp;
			q += angles[n];
		}
		else {
			temp = x + (y >> n);
			y = y - (x >> n);
			x = temp;
			q -= angles[n];
		}
	}
	*cosQ = x * scale;
	*sinQ = y * scale;
}

real taylor_cos(real Q) {
	uint8_t n = 0;
	while (Q > 0.2) {  //  |Q|足够小才能保证taylor级数的精度
		Q *= 0.5;
		n++;
	}
	real cosQ = 0.0;
	real Q2 = Q * Q;
	//cosQ = cosQ * Q2 + 1.0 / 20922789888000.0;    // 16次方
	//cosQ = cosQ * Q2 - 1.0 / 87178291200.0;       // 14
	//cosQ = cosQ * Q2 + 1.0 / 479001600.0;         // 12，更高次方无法提高精度
	cosQ = cosQ * Q2 - 1.0 / 3628800.0;             // 10
	cosQ = cosQ * Q2 + 1.0 / 40320.0;               // 8
	cosQ = cosQ * Q2 - 1.0 / 720.0;
	cosQ = cosQ * Q2 + 1.0 / 24.0;
	cosQ = cosQ * Q2 - 1.0 / 2.0;
	cosQ = cosQ * Q2 + 1.0;
	while (n > 0) { // 二倍角公式 cos(2Q) = 2cos^2(Q) - 1
		cosQ = 2.0 * cosQ * cosQ - 1.0;
		n--;
	}
	return cosQ;
}

real taylor_sin(real Q) {
	if (Q > 0.2) {
		return taylor_cos(1.570796326794897 - Q);
	}
	//uint8_t n = 0;
	//while (Q > 0.2) {  //  |Q|足够小才能保证taylor级数的精度
	//	Q *= 0.5;
	//	n++;
	//}
	real sinQ = 0.0;
	real Q2 = Q * Q;
	//sinQ = sinQ * Q2 + 1.0 / 355687428096000.0;   // 17次方
	//sinQ = sinQ * Q2 - 1.0 / 1307674368000.0;     // 15
	//sinQ = sinQ * Q2 + 1.0 / 6227020800.0;        // 13
	sinQ = sinQ * Q2 - 1.0 / 39916800.0;            // 11
	sinQ = sinQ * Q2 + 1.0 / 362880.0;              // 9
	sinQ = sinQ * Q2 - 1.0 / 5040.0;
	sinQ = sinQ * Q2 + 1.0 / 120.0;
	sinQ = sinQ * Q2 - 1.0 / 6.0;
	sinQ = sinQ * Q2 + 1.0;
	sinQ = sinQ * Q;
	//while (n > 0) {  // 二倍角公式 sin(2Q) = 2sin(Q)cos(Q)
	//	sinQ = 2.0 * sinQ * sqrt(1 - sinQ * sinQ);
	//	n--;       // 然而sqrt函数计算代价偏大，所以不宜使用正弦二倍角公式，
	//}            // 不如利用sin(Q)=cos(pi/2 - Q)，用taylor_cos函数计算
	return sinQ;
}

//real taylor_sin(real Q) {  
//  如果直接这么写，那么在|Q|<0.2时精度不好，应该使用sin(x)在x=0附近的泰勒级数
//	return taylor_cos(1.570796326794897 - Q);
//}

int main() {
	real Q, sinQ, cosQ;
	printf("精度测试\n");
	for (uint8_t n = 1; n < 16; n++) {
		Q = 0.1 * n;
		cordic(Q, &sinQ, &cosQ);
		printf("    Q = %f\n", Q);
		printf("CORDIC: %16.15f, %16.15f\n", sinQ, cosQ);
		printf("taylor: %16.15f, %16.15f\n", taylor_sin(Q), taylor_cos(Q));
		printf("math.h: %16.15f, %16.15f\n", sin(Q), cos(Q));
		printf("---------------------------------\n");
	}
	printf("速度测试，编译器优化设为/O2 \n");
	clock_t start = clock();
	double sum = 0;
	const uint64_t N = 5e7;
	const double c = 1.57 / N;
	for (uint64_t i = 1; i < N; i++) {
		Q = c * i;
		cordic(Q, &sinQ, &cosQ);
		sum += cosQ;
	}
	printf("sum=%f,  CORDIC_Time: %fs\n", sum, (double)(clock() - start) / CLOCKS_PER_SEC);
	
	start = clock();
	sum = 0;
	for (uint64_t i = 1; i < N; i++) {
		Q = c * i;
		sum += taylor_cos(Q);
	}
	printf("sum=%f,  Taylor_Time: %fs\n", sum, (double)(clock() - start) / CLOCKS_PER_SEC);
	
	start = clock();
	sum = 0;
	for (uint64_t i = 1; i < N; i++) {
		Q = c * i;
		sum += cos(Q);
	}
	printf("sum=%f,  math.h_Time: %fs\n", sum, (double)(clock() - start) / CLOCKS_PER_SEC);

	return 0;
}

CPU：Intel Core i7 12700H, 开发环境VS2022，使用双精度计算时，输出结果如下：

精度测试
    Q = 0.100000
CORDIC: 0.099833416646617, 0.995004165278047
taylor: 0.099833416646828, 0.995004165278026
math.h: 0.099833416646828, 0.995004165278026
---------------------------------
    Q = 0.200000
CORDIC: 0.198669330794285, 0.980066577841399
taylor: 0.198669330795061, 0.980066577841242
math.h: 0.198669330795061, 0.980066577841242
---------------------------------
    Q = 0.300000
CORDIC: 0.295520206662469, 0.955336489125257
taylor: 0.295520206661338, 0.955336489125606
math.h: 0.295520206661340, 0.955336489125606
---------------------------------
    Q = 0.400000
CORDIC: 0.389418342309467, 0.921060994002540
taylor: 0.389418342308649, 0.921060994002885
math.h: 0.389418342308651, 0.921060994002885
---------------------------------
    Q = 0.500000
CORDIC: 0.479425538604363, 0.877582561890286
taylor: 0.479425538604200, 0.877582561890372
math.h: 0.479425538604203, 0.877582561890373
---------------------------------
    Q = 0.600000
CORDIC: 0.564642473394510, 0.825335614910038
taylor: 0.564642473395032, 0.825335614909677
math.h: 0.564642473395035, 0.825335614909678
---------------------------------
    Q = 0.700000
CORDIC: 0.644217687238764, 0.764842187283585
taylor: 0.644217687237689, 0.764842187284488
math.h: 0.644217687237691, 0.764842187284488
---------------------------------
    Q = 0.800000
CORDIC: 0.717356090900460, 0.696706709346200
taylor: 0.717356090899523, 0.696706709347165
math.h: 0.717356090899523, 0.696706709347165
---------------------------------
    Q = 0.900000
CORDIC: 0.783326909627326, 0.621609968270863
taylor: 0.783326909627483, 0.621609968270663
math.h: 0.783326909627483, 0.621609968270664
---------------------------------
    Q = 1.000000
CORDIC: 0.841470984807630, 0.540302305868554
taylor: 0.841470984807897, 0.540302305868138
math.h: 0.841470984807897, 0.540302305868140
---------------------------------
    Q = 1.100000
CORDIC: 0.891207360060892, 0.453596121426645
taylor: 0.891207360061435, 0.453596121425579
math.h: 0.891207360061435, 0.453596121425577
---------------------------------
    Q = 1.200000
CORDIC: 0.932039085967490, 0.362357754475995
taylor: 0.932039085967226, 0.362357754476670
math.h: 0.932039085967226, 0.362357754476673
---------------------------------
    Q = 1.300000
CORDIC: 0.963558185417581, 0.267498828623190
taylor: 0.963558185417193, 0.267498828624585
math.h: 0.963558185417193, 0.267498828624587
---------------------------------
    Q = 1.400000
CORDIC: 0.985449729988356, 0.169967142900846
taylor: 0.985449729988460, 0.169967142900239
math.h: 0.985449729988460, 0.169967142900241
---------------------------------
    Q = 1.500000
CORDIC: 0.997494986603962, 0.070737201669002
taylor: 0.997494986604054, 0.070737201667705
math.h: 0.997494986604054, 0.070737201667703
---------------------------------
速度测试，编译器优化设为/O2
sum=31847123.159845,  CORDIC_Time: 5.055000s
sum=31847123.159845,  Taylor_Time: 0.196000s
sum=31847123.159845,  math.h_Time: 0.170000s

磁悬浮青蛙呱呱呱

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
CORDIC算法与泰勒级数计算三角函数sin,cos的速度与精度对比

如果CORDIC算法中不使用移位运算，而是直接使用浮点数乘法(除以浮点常数的除法会被C++编译器优化为乘法)，那么CORDIC算法的浮点乘法次数大大超过同精度的泰勒级数，完全失去了加快计算速度的意义。本文只考虑角度在(0, pi/2)内的情形，使用整型变量实现CORDIC算法(FPGA程序中通常使用定点数实现)，与浮点数实现的泰勒级数算法进行速度与精度的对比，只在电脑CPU进行了测试。下面的测试表明，在电脑CPU上，泰勒级数的精度和速度都大大超过了CORDIC算法。
复制链接

扫一扫