cpu cache valgrind cachegrind测试

#include <stdio.h>
#include <unistd.h>
#include <sys/time.h>
#include <stdlib.h>

int  MAX_SIZE = 10240;

int main()
{
	float time_use=0;

	struct timeval start;

	struct timeval end;
	
	int **array = NULL;
	int i,j, num = 0;

    printf("请输入行数\n");  
    scanf("%d",&MAX_SIZE);  
    array=(int**)malloc(sizeof(int*)*MAX_SIZE);  
    for(i=0;i<MAX_SIZE;i++)  
        array[i]=(int*)malloc(sizeof(int)*MAX_SIZE);  
	
	gettimeofday(&start,NULL); //gettimeofday(&start,&tz);结果一样
	printf("start.tv_sec:%d\n",start.tv_sec);
	printf("start.tv_usec:%d\n",start.tv_usec);	
	for(i = 0; i < MAX_SIZE; i++)
	{
		for(j = 0; j < MAX_SIZE; j++)
		{
			array[i][j] = num++;
		}
	}
	gettimeofday(&end,NULL);
	printf("end.tv_sec:%d\n",end.tv_sec);
	printf("end.tv_usec:%d\n",end.tv_usec);
	time_use=(end.tv_sec-start.tv_sec)*1000000+(end.tv_usec-start.tv_usec);//微秒
	printf("time_use is %f\n",time_use);
	
	num = 0;
	gettimeofday(&start,NULL); //gettimeofday(&start,&tz);结果一样
	printf("start.tv_sec:%d\n",start.tv_sec);
	printf("start.tv_usec:%d\n",start.tv_usec);
	//cache miss 操作
	for(i = 0; i < MAX_SIZE; i++)
	{
		for(j = 0; j < MAX_SIZE; j++)
		{
			array[j][i] = num++;
		}
	}

	gettimeofday(&end,NULL);
	printf("end.tv_sec:%d\n",end.tv_sec);
	printf("end.tv_usec:%d\n",end.tv_usec);
	time_use=(end.tv_sec-start.tv_sec)*1000000+(end.tv_usec-start.tv_usec);//微秒
	printf("time_use is %f\n",time_use);
	
	return 0;
}

测试结果:

 

测试结果相差一倍左右时间, 由于第二个循环操作,不是访问连续内存,导致cache miss,需要重新从内存获取数据,导致性能比较差.

 

valgrind 统计性能数据

 

cache miss 程序

#include <stdio.h>
#include <unistd.h>
#include <sys/time.h>
#include <stdlib.h>

int  MAX_SIZE = 10240;

int main()
{
	float time_use=0;
	struct timeval start;
	struct timeval end;
	int **array = NULL;
	int i,j, num = 0;

    printf("请输入行数\n");  
    scanf("%d",&MAX_SIZE);  
    array=(int**)malloc(sizeof(int*)*MAX_SIZE);  
    for(i=0;i<MAX_SIZE;i++)  
        array[i]=(int*)malloc(sizeof(int)*MAX_SIZE);  
	
	gettimeofday(&start,NULL); //gettimeofday(&start,&tz);结果一样
	printf("start.tv_sec:%d\n",start.tv_sec);
	printf("start.tv_usec:%d\n",start.tv_usec);
	
	for(i = 0; i < MAX_SIZE; i++)
	{
		for(j = 0; j < MAX_SIZE; j++)
		{
			array[j][i] = num++;
		}
	}

	gettimeofday(&end,NULL);
	printf("end.tv_sec:%d\n",end.tv_sec);
	printf("end.tv_usec:%d\n",end.tv_usec);
	time_use=(end.tv_sec-start.tv_sec)*1000000+(end.tv_usec-start.tv_usec);//微秒
	printf("time_use is %f\n",time_use);
	
	return 0;
}

测试结果 

x03430-a@x03430-a:~/test/dpdk$ valgrind --tool=cachegrind  ./cache_test2
==16617== Cachegrind, a cache and branch-prediction profiler
==16617== Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote et al.
==16617== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==16617== Command: ./cache_test2
==16617== 
--16617-- warning: L3 cache found, using its data for the LL simulation.
请输入行数
1024
start.tv_sec:1604299021
start.tv_usec:132371
end.tv_sec:1604299021
end.tv_usec:214520
time_use is 82149.000000
==16617== 
==16617== I   refs:      19,310,723
==16617== I1  misses:         1,291
==16617== LLi misses:         1,252
==16617== I1  miss rate:       0.01%
==16617== LLi miss rate:       0.01%
==16617== 
==16617== D   refs:      10,629,320  (8,483,206 rd   + 2,146,114 wr)
==16617== D1  misses:     1,184,197  (  133,778 rd   + 1,050,419 wr)
==16617== LLd misses:        68,554  (    2,073 rd   +    66,481 wr)
==16617== D1  miss rate:       11.1% (      1.6%     +      48.9%  )
==16617== LLd miss rate:        0.6% (      0.0%     +       3.1%  )
==16617== 
==16617== LL refs:        1,185,488  (  135,069 rd   + 1,050,419 wr)
==16617== LL misses:         69,806  (    3,325 rd   +    66,481 wr)
==16617== LL miss rate:         0.2% (      0.0%     +       3.1%  )

 

正常代码:

#include <stdio.h>
#include <unistd.h>
#include <sys/time.h>
#include <stdlib.h>

int  MAX_SIZE = 10240;

int main()
{
	float time_use=0;
	struct timeval start;
	struct timeval end;
	int **array = NULL;
	int i,j, num = 0;

    printf("请输入行数\n");  
    scanf("%d",&MAX_SIZE);  
    array=(int**)malloc(sizeof(int*)*MAX_SIZE);  
    for(i=0;i<MAX_SIZE;i++)  
        array[i]=(int*)malloc(sizeof(int)*MAX_SIZE);  
	
	gettimeofday(&start,NULL); //gettimeofday(&start,&tz);结果一样
	printf("start.tv_sec:%d\n",start.tv_sec);
	printf("start.tv_usec:%d\n",start.tv_usec);	
	for(i = 0; i < MAX_SIZE; i++)
	{
		for(j = 0; j < MAX_SIZE; j++)
		{
			array[i][j] = num++;
		}
	}
	gettimeofday(&end,NULL);
	printf("end.tv_sec:%d\n",end.tv_sec);
	printf("end.tv_usec:%d\n",end.tv_usec);
	time_use=(end.tv_sec-start.tv_sec)*1000000+(end.tv_usec-start.tv_usec);//微秒
	printf("time_use is %f\n",time_use);
	
	
	return 0;
}

 

测试结果:

 

x03430-a@x03430-a:~/test/dpdk$ valgrind --tool=cachegrind  ./cache_test3
==16904== Cachegrind, a cache and branch-prediction profiler
==16904== Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote et al.
==16904== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==16904== Command: ./cache_test3
==16904== 
--16904-- warning: L3 cache found, using its data for the LL simulation.
请输入行数
1024
start.tv_sec:1604299026
start.tv_usec:556480
end.tv_sec:1604299026
end.tv_usec:624710
time_use is 68230.000000
==16904== 
==16904== I   refs:      19,310,708
==16904== I1  misses:         1,290
==16904== LLi misses:         1,251
==16904== I1  miss rate:       0.01%
==16904== LLi miss rate:       0.01%
==16904== 
==16904== D   refs:      10,629,314  (8,483,203 rd   + 2,146,111 wr)
==16904== D1  misses:        70,468  (    2,833 rd   +    67,635 wr)
==16904== LLd misses:        68,554  (    2,073 rd   +    66,481 wr)
==16904== D1  miss rate:        0.7% (      0.0%     +       3.2%  )
==16904== LLd miss rate:        0.6% (      0.0%     +       3.1%  )
==16904== 
==16904== LL refs:           71,758  (    4,123 rd   +    67,635 wr)
==16904== LL misses:         69,805  (    3,324 rd   +    66,481 wr)
==16904== LL miss rate:         0.2% (      0.0%     +       3.1%  )

可以对比出来,cache miss对程序的性能影响还是很大的.

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值