关于嵌入式性能调优

最新推荐文章于 2023-07-04 13:56:14 发布

hustcc

最新推荐文章于 2023-07-04 13:56:14 发布

阅读量746

点赞数

分类专栏： C/C++ 文章标签：嵌入式 integer 优化 random 算法 fft

本文链接：https://blog.csdn.net/hustcc/article/details/3008058

版权

C/C++ 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

本来对嵌入式性能调优方面的一些文章还有些不以为然，但经过自己亲身体验之后，我不得不承认那些都是无比正确的。
对于性能问题，我一向的观点是，首先优化算法，能够查表的情况下就查表（如三角函数），能展开写的就不循环甚至递归。然后是尽量少的拷贝和分配内存，数据能共享的就共享，能一次分配的内存就一次分配。最后才是代码细节的优化。代码细节的优化大概有以下几点：
1、能预先算好的就预先算好。比如旋转变换的sin(angle)和cos(angle)就可以预先算好，而不必每个点都重新算一遍。
2、干掉所有浮点运算。当然，这是不可能的，我们只能做到在最花CPU时间的地方见不到浮点运算，这往往要结合第一点来实现，我们可以预先算好需要的参数，假如它是浮点数，那么我们可以将其乘以2^n，这个n取决于需要的精度，然后取整。在每个需要乘以这个参数的地方加上>>n。
3、干掉所有除法。我们往往可以将其转化为浮点乘法，然后应用第二点。
4、在可能的情况下，用移位代替乘除法。
5、赋值、比较等时候，数据类型要相符，再不济也要加上强制类型转换。
6、多用宏函数或内联函数。
下面是一些高效的算法：
一、平方根

 
 /* fsqrt.c
 *
 * A fast square root program adapted from the code of
 * Paul Lalonde and Robert Dawson in Graphics Gems I.
 * The format of IEEE double precision floating point numbers is:
 *
 * SEEEEEEEEEEEMMMM MMMMMMMMMMMMMMMM MMMMMMMMMMMMMMMM MMMMMMMMMMMMMMMM
 *
 * S = Sign bit for whole number
 * E = Exponent bit (exponent in excess 1023 form)
 * M = Mantissa bit
 */
#include <stdio.h>
#include <math.h>
/* MOST_SIG_OFFSET gives the (int *) offset from the address of the double
 * to the part of the number containing the sign and exponent.
 * You will need to find the relevant offset for your architecture.
 */
#define MOST_SIG_OFFSET 1
/* SQRT_TAB_SIZE - the size of the lookup table - must be a power of four.
 */
#define SQRT_TAB_SIZE 16384
/* MANT_SHIFTS is the number of shifts to move mantissa into position.
 * If you quadruple the table size subtract two from this constant,
 * if you quarter the table size then add two.
 * Valid values are: (16384, 7) (4096, 9) (1024, 11) (256, 13)
 */
#define MANT_SHIFTS   7
#define EXP_BIAS   1023       /* Exponents are always positive     */
#define EXP_SHIFTS 20         /* Shifs exponent to least sig. bits */
#define EXP_LSB    0x00100000 /* 1 << EXP_SHIFTS                   */
#define MANT_MASK  0x000FFFFF /* Mask to extract mantissa          */
int        sqrt_tab[SQRT_TAB_SIZE];
void
init_sqrt_tab()
{
        int           i;
        double        f;
        unsigned int  *fi = (unsigned int *) &f + MOST_SIG_OFFSET;
        
        for (i = 0; i < SQRT_TAB_SIZE/2; i++)
        {
                f = 0; /* Clears least sig part */
                *fi = (i << MANT_SHIFTS) | (EXP_BIAS << EXP_SHIFTS);
                f = sqrt(f);
                sqrt_tab[i] = *fi & MANT_MASK;
                f = 0; /* Clears least sig part */
                *fi = (i << MANT_SHIFTS) | ((EXP_BIAS + 1) << EXP_SHIFTS);
                f = sqrt(f);
                sqrt_tab[i + SQRT_TAB_SIZE/2] = *fi & MANT_MASK;
        }
}
double
fsqrt(f)
double f;
{
        unsigned int e;
        unsigned int   *fi = (unsigned int *) &f + MOST_SIG_OFFSET;
        if (f == 0.0) return(0.0);
        e = (*fi >> EXP_SHIFTS) - EXP_BIAS;
        *fi &= MANT_MASK;
        if (e & 1)
                *fi |= EXP_LSB;
        e >>= 1;
        *fi = (sqrt_tab[*fi >> MANT_SHIFTS]) |
              ((e + EXP_BIAS) << EXP_SHIFTS);
        return(f);
}
void
dump_sqrt_tab()
{
        int        i, nl = 0;
        printf("unsigned int sqrt_tab[] = {/n");
        for (i = 0; i < SQRT_TAB_SIZE-1; i++)
        {
                printf("0x%x,", sqrt_tab[i]);
                nl++;
                if (nl > 8) { nl = 0; putchar('/n'); }
        }
        printf("0x%x/n", sqrt_tab[SQRT_TAB_SIZE-1]);
        printf("};/n");
}
 

二、距离计算

 
 //
// 这个函数计算从(0,0)到(x,y)的距离，相对误差为3.5%
//
int FastDistance2D(int x, int y)
{
    x = abs(x);
    y = abs(y);
    int mn = MIN(x,y);
    return(x+y-(mn>>1)-(mn>>2)+(mn>>4));
}
//
// 该函数计算(0,0,0)到(x,y,z)的距离，相对误差为8%
//
float FastDistance3D(float fx, float fy, float fz)
{
    int temp;
    int x,y,z;
    // 确保所有的值为正
    x = int(fabs(fx) * 1024);
    y = int(fabs(fy) * 1024);
    z = int(fabs(fz) * 1024);
    // 排序
    if (y < x) SWAP(x,y,temp)
    if (z < y) SWAP(y,z,temp)
    if (y < x) SWAP(x,y,temp)
    int dist = (z + 11 * (y >> 5) + (x >> 2) );
    return((float)(dist >> 10));
}
 

三、随机数发生

 
 /*
A 32 bit random number generator. An implementation in C of the algorithm given by
Knuth, the art of computer programming, vol. 2, pp. 26-27. We use e=32, so 
we have to evaluate y(n) = y(n - 24) + y(n - 55) mod 2^32, which is implicitly
done by unsigned arithmetic.
*/
unsigned int Random32(void) {
  /*
  random numbers from Mathematica 2.0.
  SeedRandom = 1;
  Table[Random[Integer, {0, 2^32 - 1}]
  */
  static const unsigned long x[55] = {
    1410651636UL, 3012776752UL, 3497475623UL, 2892145026UL, 1571949714UL,
    3253082284UL, 3489895018UL, 387949491UL, 2597396737UL, 1981903553UL,
    3160251843UL, 129444464UL, 1851443344UL, 4156445905UL, 224604922UL,
    1455067070UL, 3953493484UL, 1460937157UL, 2528362617UL, 317430674UL, 
    3229354360UL, 117491133UL, 832845075UL, 1961600170UL, 1321557429UL,
    747750121UL, 545747446UL, 810476036UL, 503334515UL, 4088144633UL,
    2824216555UL, 3738252341UL, 3493754131UL, 3672533954UL, 29494241UL,
    1180928407UL, 4213624418UL, 33062851UL, 3221315737UL, 1145213552UL,
    2957984897UL, 4078668503UL, 2262661702UL, 65478801UL, 2527208841UL,
    1960622036UL, 315685891UL, 1196037864UL, 804614524UL, 1421733266UL,
    2017105031UL, 3882325900UL, 810735053UL, 384606609UL, 2393861397UL };
  static int init = 1;
  static unsigned long y[55];
  static int j, k;
  unsigned long ul;
  
  if (init)
  {
    int i;
    
    init = 0;
    for (i = 0; i < 55; i++) y[i] = x[i];
    j = 24 - 1;
    k = 55 - 1;
  }
  
  ul = (y[k] += y[j]);
  if (--j < 0) j = 55 - 1;
  if (--k < 0) k = 55 - 1;
  return((unsigned int)ul);
}
 

四、FFT变换

 
 /*
This computes an in-place complex-to-complex FFT
x and y are the real and imaginary arrays of n=2^m points. 
o(n)=n*log2(n) 
dir =  1 gives forward transform 
dir = -1 gives reverse transform  
FFT algorithm by Cooley and Tukey, 1965
*/
bool _stdcall FFT(int dir,int m,double *x,double *y)
{
    long nn,i,i1,j,k,i2,l,l1,l2; 
    double c1,c2,tx,ty,t1,t2,u1,u2,z; 
    
    /* Calculate the number of points */
    nn = 1<<m; 
    
    /* Do the bit reversal */ 
    i2 = nn >> 1; 
    j = 0; 
    for (i=0;i<nn-1;i++) { 
        if (i < j) { 
            tx = x[i]; 
            ty = y[i]; 
            x[i] = x[j]; 
            y[i] = y[j]; 
            x[j] = tx; 
            y[j] = ty; 
        } 
        k = i2; 
        while (k <= j) { 
            j -= k; 
            k >>= 1; 
        } 
        j += k; 
    } 
    
    /* Compute the FFT */ 
    c1 = -1.0; 
    c2 = 0.0; 
    l2 = 1; 
    for (l=0;l<m;l++) { 
        l1 = l2; 
        l2 <<= 1; 
        u1 = 1.0; 
        u2 = 0.0; 
        for (j=0;j<l1;j++) { 
            for (i=j;i<nn;i+=l2) { 
                i1 = i + l1; 
                t1 = u1 * x[i1] - u2 * y[i1]; 
                t2 = u1 * y[i1] + u2 * x[i1]; 
                x[i1] = x[i] - t1; 
                y[i1] = y[i] - t2; 
                x[i] += t1; 
                y[i] += t2; 
            } 
            z =  u1 * c1 - u2 * c2; 
            u2 = u1 * c2 + u2 * c1; 
            u1 = z; 
        } 
        c2 = sqrt((1.0 - c1) / 2.0); 
        if (dir == 1) 
            c2 = -c2; 
        c1 = sqrt((1.0 + c1) / 2.0); 
    } 
    
    /* Scaling for forward transform */ 
    if (dir == 1) { 
        for (i=0;i<nn;i++) { 
            x[i] /= (double)nn; 
            y[i] /= (double)nn; 
        }
    } 
    
    return true; 
}