__declspec(align(32)) char buff[17] = "aaaaaaaaaaaaaaaa";
__declspec(align(32)) char key[32] = "ab";
int buflen = strlen(buff);
int keylen = strlen(key);
int step = 16-keylen;
int c;
char *p = NULL;
for(int i = 0; i < buflen; i+=step)
{
c = _mm_cmpestri(*(__m128i*)key, keylen, *(__m128i*)&(buff[i]), buflen-i, 12);
if (c <= step)
{
p = buff + i + c;
break;
}
}
被编译成:
movdqa xmm0,xmmword ptr [ebp+eax-40h]
mov eax,dword ptr [ebp-98h]
movdqa xmm1,xmmword ptr [ebp-80h]
pcmpestri xmm1,xmm0,0Ch
用的都是需要对齐的指令,循环第一轮的时候没问题;第二轮的时候,i+=14,这时候地址就不是16字节对齐了,movdqa就挂了。所以还是新建个asm文件,直接使用pcmpestri指令把,这样的话,加载就可以用movdqu了。