1、空间开销
如果问大家sizeof(char)等于多少,大家一定都知道等于1字节。那么如果使用new运算法分别动态分配10个char变量,那么这10个char变量所占的内存空间就仅仅只有10字节吗?并不是!使用malloc动态分配可能会需要好几倍的额外开销。
根据书中给出的测试程序,程序中定义了一个宏,在该宏定义中,首先给出相应结构的sizeof()信息,然后用类似下面的形式给出new分配的字节数的估计:
(我在我的计算机上进行了测试,计算机配置CPU Xeon E5603,内存8G)
从上述结果看出,动态分配1个字节的变量,可能会需要消耗16个字节的内存,并且分配的块大小一般为8的倍数,这告诉我们再估计程序的内存需求时一定要注意动态分配时候所需要的额外内存开销!
2、时间开销
“你的计算机中执行一个整形加法需要多少时间?””执行一个判断语句需要多少时间?“...问你这些问题,你或许只会说,”不知道,我就知道需要很短的时间,除法时间最长,等等“。
我们在对程序做代码调优的时候,如果能对所写代码所需的执行时间有个粗略的估计,然后找到耗时的代码,用其他省时的算法及代码来代替,就可以此来提高程序的执行效率。
根据书中的测试程序,在我的计算机上,我得到了如下一些结果:
(1)整形运算
第一行说明执行空循环需要5纳秒,下一行说明变量k自增居然比空语句还快(实践才是真理啊!)。除了除法和取模运算的开销高一倍外,所有算术和逻辑运算所需的开销基本相同,4至5秒。
(2)浮点运算与数组运算
可以看到浮点数运算与整形运算的开销基本相同,只是浮点数除法运算需要更高的开销。数组的时间开销也几乎相同,在4、5纳秒之间。
(3)一般控制流和一些特定排序操作
比较和交换操作的函数版本比内联版本的开销多4纳秒。下面是取两个值中最大值的函数、宏和内联三个版本开销:
最后的函数版本明显比宏和内联版本的开销高出一倍。
(4)rand和数学计算
rand函数和开方运算的开销相对较小,只比基本算术运算(<10纳秒)大一个数量级,需要20多纳秒(>20纳秒)。而其他简单的三角函数运算则比基本算术运算大出两个数量级,需要几百纳秒。其中双曲正弦函数sinh开销最多。
(5)内存分配
malloc和free等内存分配的开销也很大,需要几百纳秒(>100纳秒)。
总结:1、基本代码操作中,开销最大的是内存分配和三角函数运算,需要百纳秒级的开销,而其他基本的算术操作只需十纳秒级的开销。当然这只是在我个人计算机上的测试结果。2、函数运算明显比内联运算开销要大,若是函数体比较简单,则函数额外开销会占总时长的很大比例。
3、测试代码
(1)空间开销测试
/* Copyright (C) 1999 Lucent Technologies */
/* From 'Programming Pearls' by Jon Bentley */
/* spacemod.cpp -- simple model for C++ space */
#include <iostream>
using namespace std;
#define MEASURE(T, text) { \
cout << text << "\t"; \
cout << sizeof(T) << "\t"; \
int lastp = 0; \
for (int i = 0; i < 11; i++) { \
T *p = new T; \
int thisp = (int) p; \
if (lastp != 0) \
cout << " " << thisp - lastp; \
lastp = thisp; \
} \
cout << "\n"; \
}
// Must use macros; templates give funny answers
template <class T>
void measure(char *text)
{ cout << " measure: " << text << "\t";
cout << sizeof(T) << "\n";
}
struct structc { char c; };
struct structic { int i; char c; };
struct structip { int i; structip *p; };
struct structdc { double d; char c; };
struct structcd { char c; double d; };
struct structcdc { char c1; double d; char c2; };
struct structiii { int i1; int i2; int i3; };
struct structiic { int i1; int i2; char c; };
struct structc12 { char c[12]; };
struct structc13 { char c[13]; };
struct structc28 { char c[28]; };
struct structc29 { char c[29]; };
struct structc44 { char c[44]; };
struct structc45 { char c[45]; };
struct structc60 { char c[60]; };
struct structc61 { char c[61]; };
int main()
{ cout << "Raw sizeof";
cout << "\nsizeof(char)=" << sizeof(char);
cout << " sizeof(short)=" << sizeof(short);
cout << " sizeof(int)=" << sizeof(int);
cout << "\nsizeof(float)=" << sizeof(float);
cout << " sizeof(struct *)=" << sizeof(structc *);
cout << " sizeof(long)=" << sizeof(long);
cout << "\nsizeof(double)=" << sizeof(double);
cout << "\n\nMEASURE macro\n";
MEASURE(int, "int");
MEASURE(structc, "structc");
MEASURE(structic, "structic");
MEASURE(structip, "structip");
MEASURE(structdc, "structdc");
MEASURE(structcd, "structcd");
MEASURE(structcdc, "structcdc");
MEASURE(structiii, "structiii");
MEASURE(structiic, "structiic");
MEASURE(structc12, "structc12");
MEASURE(structc13, "structc13");
MEASURE(structc28, "structc28");
MEASURE(structc29, "structc29");
MEASURE(structc44, "structc44");
MEASURE(structc45, "structc45");
MEASURE(structc60, "structc60");
MEASURE(structc61, "structc61");
cout << "\nmeasure template (strange results)\n";
// Uncomment below lines to see answers change
measure<int>("int");
// measure<structc>("structc");
// measure<structic>("structic");
return 0;
}
(2)时间开销测试
/* Copyright (C) 1999 Lucent Technologies */
/* From 'Programming Pearls' by Jon Bentley */
/* timemod.c -- Produce table of C run time costs */
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <math.h>
#define MAXN 100000
int x[MAXN];
int startn = 5000;
int n;
/* FUNCTIONS TO BE TIMED */
int intcmp(int *i, int *j)
{ return *i - *j; }
#define swapmac(i, j) { t = x[i]; x[i] = x[j]; x[j] = t; }
void swapfunc(int i, int j)
{ int t = x[i];
x[i] = x[j];
x[j] = t;
}
#define maxmac(a, b) ((a) > (b) ? (a) : (b))
int maxfunc(int a, int b)
{ return a > b ? a : b; }
/* WORKHORSE */
#define T(s) printf("%s (n=%d)\n", s, n);
#define TRIALS 5
#define M(op) \
printf(" %-22s", #op); \
k = 0; \
timesum = 0; \
for (ex = 0; ex < TRIALS; ex++) { \
start = clock(); \
for (i = 1; i <= n; i++) { \
fi = (float) i; \
for (j = 1; j <= n; j++) { \
op; \
} \
} \
t = clock()-start; \
printf("%6d", t); \
timesum += t; \
} \
nans = 1e9 * timesum / ((double) \
n*n * TRIALS * CLOCKS_PER_SEC); \
printf("%8.0f\n", nans);
int main()
{ int i, j, k;
float fi, fj, fk;
int t, ex, timesum, start, globalstart;
double nans;
globalstart = clock();
for (i = 0; i < n; i++)
x[i] = rand();
n = startn;
printf("C Time Cost Model, n=%d\n", n);
T("Integer Arithmetic");
M({});
M(k++);
M(k = i + j);
M(k = i - j);
M(k = i * j);
M(k = i / j);
M(k = i % j);
M(k = i & j);
M(k = i | j);
T("Floating Point Arithmetic");
M(fj=j;);
M(fj=j; fk = fi + fj;);
M(fj=j; fk = fi - fj;);
M(fj=j; fk = fi * fj;);
M(fj=j; fk = fi / fj;);
T("Array Operations");
M(k = i + j);
M(k = x[i] + j);
M(k = i + x[j]);
M(k = x[i] + x[j]);
T("Comparisons");
M(if (i < j) k++);
M(if (x[i] < x[j]) k++);
T("Array Comparisons and Swaps");
M(k = (x[i]<x[k]) ? -1:1);
M(k = intcmp(x+i, x+j));
M(swapmac(i, j));
M(swapfunc(i, j));
T("Max Function, Macro and Inline");
M(k = (i > j) ? i : j);
M(k = maxmac(i, j));
M(k = maxfunc(i, j));
n = startn / 5;
T("Math Functions");
M(fk = j+fi;);
M(k = rand(););
M(fk = sqrt(j+fi));
M(fk = sin(j+fi));
M(fk = sinh(j+fi));
M(fk = asin(j+fi));
M(fk = cos(j+fi));
M(fk = tan(j+fi));
n = startn / 10;
T("Memory Allocation");
M(free(malloc(16)););
M(free(malloc(100)););
M(free(malloc(2000)););
/* Possible additions: input, output, malloc */
printf(" Secs: %4.2f\n",
((double) clock()-globalstart)
/ CLOCKS_PER_SEC);
return 0;
}