内存拷贝函数
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <errno.h>
#include <xmmintrin.h> // SSE Intrinsics
#define SIZE_1K 1024
#define SIZE_1M (1024 * 1024)
void* aligned_malloc(size_t size) {
void* ptr = _mm_malloc(size, 16);
if (!ptr) perror("aligned_malloc failed");
return ptr;
}
void* simd_memcpy(void* dest, const void* src, size_t size) {
// Use unaligned load/store instructions in case of unaligned addresses
__m128i* d = (__m128i*)dest;
const __m128i* s = (__m128i*)src;
size_t alignedSize = (size / sizeof(__m128i)) * sizeof(__m128i); // Size that can be copied by 128-bit chunks
// Copy aligned part using SIMD
for (size_t i = 0; i < alignedSize / sizeof(__m128i); ++i) {
_mm_storeu_si128(d++, _mm_loadu_si128(s++)); // Using unaligned instructions
}
// Handle remaining bytes if any
size_t remainingBytes = size - alignedSize;
if (remainingBytes > 0) {
char* byteDest = (char*)d;
const char* byteSrc = (const char*)s;
// Standard byte-by-byte copy for the remainder
memcpy(byteDest, byteSrc, remainingBytes);
}
return dest;
}
void test_memcpy(void* (*func)(void* , const void* , size_t), size_t size, int iterations, const char* funcName) {
if (!func || size <= 0) {
fprintf(stderr, "Invalid arguments: function is NULL or size is non-positive.\n");
return;
}
const char* src = (const char*)malloc(size);
char* dst = (char*)malloc(size);
if (!src || !dst) {
perror("malloc failed");
exit(EXIT_FAILURE);
}
struct timespec start, end;
if (clock_gettime(CLOCK_MONOTONIC, &start) == -1) {
perror("clock_gettime failed");
}
for (int i = 0; i < iterations; ++i) {
func(dst, src, size); // Execute copy
}
if (clock_gettime(CLOCK_MONOTONIC, &end) == -1) {
perror("clock_gettime failed");
}
double elapsed = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) / 1e9;
printf("Function: %s, Size: %zu bytes, Iterations: %d, Time: %.6f s\n", funcName, size, iterations, elapsed);
}
int main() {
// Test standard memcpy
test_memcpy(memcpy, SIZE_1K, 2048*2048*3, "memcpy");
// Test GCC's built-in memcpy
test_memcpy(__builtin_memcpy, SIZE_1K, 2048*2048*3, "__builtin_memcpy");
// Test SSE-based SIMD memcpy, ensuring memory is aligned
{
test_memcpy(simd_memcpy, SIZE_1K, 2048*2048*3, "simd_memcpy");
}
return 0;
}