#include <stdio.h>
#include <unistd.h>
#include <sys/time.h>
#include <stdlib.h>
int MAX_SIZE = 10240;
int main()
{
float time_use=0;
struct timeval start;
struct timeval end;
int **array = NULL;
int i,j, num = 0;
printf("请输入行数\n");
scanf("%d",&MAX_SIZE);
array=(int**)malloc(sizeof(int*)*MAX_SIZE);
for(i=0;i<MAX_SIZE;i++)
array[i]=(int*)malloc(sizeof(int)*MAX_SIZE);
gettimeofday(&start,NULL); //gettimeofday(&start,&tz);结果一样
printf("start.tv_sec:%d\n",start.tv_sec);
printf("start.tv_usec:%d\n",start.tv_usec);
for(i = 0; i < MAX_SIZE; i++)
{
for(j = 0; j < MAX_SIZE; j++)
{
array[i][j] = num++;
}
}
gettimeofday(&end,NULL);
printf("end.tv_sec:%d\n",end.tv_sec);
printf("end.tv_usec:%d\n",end.tv_usec);
time_use=(end.tv_sec-start.tv_sec)*1000000+(end.tv_usec-start.tv_usec);//微秒
printf("time_use is %f\n",time_use);
num = 0;
gettimeofday(&start,NULL); //gettimeofday(&start,&tz);结果一样
printf("start.tv_sec:%d\n",start.tv_sec);
printf("start.tv_usec:%d\n",start.tv_usec);
//cache miss 操作
for(i = 0; i < MAX_SIZE; i++)
{
for(j = 0; j < MAX_SIZE; j++)
{
array[j][i] = num++;
}
}
gettimeofday(&end,NULL);
printf("end.tv_sec:%d\n",end.tv_sec);
printf("end.tv_usec:%d\n",end.tv_usec);
time_use=(end.tv_sec-start.tv_sec)*1000000+(end.tv_usec-start.tv_usec);//微秒
printf("time_use is %f\n",time_use);
return 0;
}
测试结果:
测试结果相差一倍左右时间, 由于第二个循环操作,不是访问连续内存,导致cache miss,需要重新从内存获取数据,导致性能比较差.
valgrind 统计性能数据
cache miss 程序
#include <stdio.h>
#include <unistd.h>
#include <sys/time.h>
#include <stdlib.h>
int MAX_SIZE = 10240;
int main()
{
float time_use=0;
struct timeval start;
struct timeval end;
int **array = NULL;
int i,j, num = 0;
printf("请输入行数\n");
scanf("%d",&MAX_SIZE);
array=(int**)malloc(sizeof(int*)*MAX_SIZE);
for(i=0;i<MAX_SIZE;i++)
array[i]=(int*)malloc(sizeof(int)*MAX_SIZE);
gettimeofday(&start,NULL); //gettimeofday(&start,&tz);结果一样
printf("start.tv_sec:%d\n",start.tv_sec);
printf("start.tv_usec:%d\n",start.tv_usec);
for(i = 0; i < MAX_SIZE; i++)
{
for(j = 0; j < MAX_SIZE; j++)
{
array[j][i] = num++;
}
}
gettimeofday(&end,NULL);
printf("end.tv_sec:%d\n",end.tv_sec);
printf("end.tv_usec:%d\n",end.tv_usec);
time_use=(end.tv_sec-start.tv_sec)*1000000+(end.tv_usec-start.tv_usec);//微秒
printf("time_use is %f\n",time_use);
return 0;
}
测试结果
x03430-a@x03430-a:~/test/dpdk$ valgrind --tool=cachegrind ./cache_test2
==16617== Cachegrind, a cache and branch-prediction profiler
==16617== Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote et al.
==16617== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==16617== Command: ./cache_test2
==16617==
--16617-- warning: L3 cache found, using its data for the LL simulation.
请输入行数
1024
start.tv_sec:1604299021
start.tv_usec:132371
end.tv_sec:1604299021
end.tv_usec:214520
time_use is 82149.000000
==16617==
==16617== I refs: 19,310,723
==16617== I1 misses: 1,291
==16617== LLi misses: 1,252
==16617== I1 miss rate: 0.01%
==16617== LLi miss rate: 0.01%
==16617==
==16617== D refs: 10,629,320 (8,483,206 rd + 2,146,114 wr)
==16617== D1 misses: 1,184,197 ( 133,778 rd + 1,050,419 wr)
==16617== LLd misses: 68,554 ( 2,073 rd + 66,481 wr)
==16617== D1 miss rate: 11.1% ( 1.6% + 48.9% )
==16617== LLd miss rate: 0.6% ( 0.0% + 3.1% )
==16617==
==16617== LL refs: 1,185,488 ( 135,069 rd + 1,050,419 wr)
==16617== LL misses: 69,806 ( 3,325 rd + 66,481 wr)
==16617== LL miss rate: 0.2% ( 0.0% + 3.1% )
正常代码:
#include <stdio.h>
#include <unistd.h>
#include <sys/time.h>
#include <stdlib.h>
int MAX_SIZE = 10240;
int main()
{
float time_use=0;
struct timeval start;
struct timeval end;
int **array = NULL;
int i,j, num = 0;
printf("请输入行数\n");
scanf("%d",&MAX_SIZE);
array=(int**)malloc(sizeof(int*)*MAX_SIZE);
for(i=0;i<MAX_SIZE;i++)
array[i]=(int*)malloc(sizeof(int)*MAX_SIZE);
gettimeofday(&start,NULL); //gettimeofday(&start,&tz);结果一样
printf("start.tv_sec:%d\n",start.tv_sec);
printf("start.tv_usec:%d\n",start.tv_usec);
for(i = 0; i < MAX_SIZE; i++)
{
for(j = 0; j < MAX_SIZE; j++)
{
array[i][j] = num++;
}
}
gettimeofday(&end,NULL);
printf("end.tv_sec:%d\n",end.tv_sec);
printf("end.tv_usec:%d\n",end.tv_usec);
time_use=(end.tv_sec-start.tv_sec)*1000000+(end.tv_usec-start.tv_usec);//微秒
printf("time_use is %f\n",time_use);
return 0;
}
测试结果:
x03430-a@x03430-a:~/test/dpdk$ valgrind --tool=cachegrind ./cache_test3
==16904== Cachegrind, a cache and branch-prediction profiler
==16904== Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote et al.
==16904== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==16904== Command: ./cache_test3
==16904==
--16904-- warning: L3 cache found, using its data for the LL simulation.
请输入行数
1024
start.tv_sec:1604299026
start.tv_usec:556480
end.tv_sec:1604299026
end.tv_usec:624710
time_use is 68230.000000
==16904==
==16904== I refs: 19,310,708
==16904== I1 misses: 1,290
==16904== LLi misses: 1,251
==16904== I1 miss rate: 0.01%
==16904== LLi miss rate: 0.01%
==16904==
==16904== D refs: 10,629,314 (8,483,203 rd + 2,146,111 wr)
==16904== D1 misses: 70,468 ( 2,833 rd + 67,635 wr)
==16904== LLd misses: 68,554 ( 2,073 rd + 66,481 wr)
==16904== D1 miss rate: 0.7% ( 0.0% + 3.2% )
==16904== LLd miss rate: 0.6% ( 0.0% + 3.1% )
==16904==
==16904== LL refs: 71,758 ( 4,123 rd + 67,635 wr)
==16904== LL misses: 69,805 ( 3,324 rd + 66,481 wr)
==16904== LL miss rate: 0.2% ( 0.0% + 3.1% )
可以对比出来,cache miss对程序的性能影响还是很大的.