static __always_inline void *__memcpy(void *to, const void *from, size_t n)
{
int d0, d1, d2;
asm volatile("rep ; movsl\n\t"
"movl %4,%%ecx\n\t"
"andl $3,%%ecx\n\t"
"jz 1f\n\t"
"rep ; movsb\n\t"
"1:"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
: "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
: "memory");
return to;
}
/*
* This looks ugly, but the compiler can optimize it totally,
* as the count is constant.
*/
static __always_inline void *__constant_memcpy(void *to, const void *from,
size_t n)
{
long esi, edi;
if (!n)
return to;
switch (n) {
case 1:
*(char *)to = *(char *)from;
return to;
case 2:
*(short *)to = *(short *)from;
return to;
case 4:
*(int *)to = *(int *)from;
return to;
case 3:
*(short *)to = *(short *)from;
*((char *)to + 2) = *((char *)from + 2);
return to;
case 5:
*(int *)to = *(int *)from;
*((char *)to + 4) = *((char *)from + 4);
return to;
case 6:
*(int *)to = *(int *)from;
*((short *)to + 2) = *((short *)from + 2);
return to;
case 8:
*(int *)to = *(int *)from;
*((int *)to + 1) = *((int *)from + 1);
return to;
}
esi = (long)from;
edi = (long)to;
if (n >= 5 * 4) {
/* large block: use rep prefix */
int ecx;
asm volatile("rep ; movsl"
: "=&c" (ecx), "=&D" (edi), "=&S" (esi)
: "0" (n / 4), "1" (edi), "2" (esi)
: "memory"
);
} else {
/* small block: don't clobber ecx + smaller code */
if (n >= 4 * 4)
asm volatile("movsl"
: "=&D"(edi), "=&S"(esi)
: "0"(edi), "1"(esi)
: "memory");
if (n >= 3 * 4)
asm volatile("movsl"
: "=&D"(edi), "=&S"(esi)
: "0"(edi), "1"(esi)
: "memory");
if (n >= 2 * 4)
asm volatile("movsl"
: "=&D"(edi), "=&S"(esi)
: "0"(edi), "1"(esi)
: "memory");
if (n >= 1 * 4)
asm volatile("movsl"
: "=&D"(edi), "=&S"(esi)
: "0"(edi), "1"(esi)
: "memory");
}
switch (n % 4) {
/* tail */
case 0:
return to;
case 1:
asm volatile("movsb"
: "=&D"(edi), "=&S"(esi)
: "0"(edi), "1"(esi)
: "memory");
return to;
case 2:
asm volatile("movsw"
: "=&D"(edi), "=&S"(esi)
: "0"(edi), "1"(esi)
: "memory");
return to;
default:
asm volatile("movsw\n\tmovsb"
: "=&D"(edi), "=&S"(esi)
: "0"(edi), "1"(esi)
: "memory");
return to;
}
}
最终调用声明
include/asm-x86/string_32.h
#define memcpy(t, f, n) \
(__builtin_constant_p((n)) \
? __constant_memcpy((t), (f), (n)) \
: __memcpy((t), (f), (n)))