内存拷贝函数对比测试

内存拷贝函数

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <errno.h>
#include <xmmintrin.h> // SSE Intrinsics

#define SIZE_1K 1024
#define SIZE_1M (1024 * 1024)

void* aligned_malloc(size_t size) {
    void* ptr = _mm_malloc(size, 16);
    if (!ptr) perror("aligned_malloc failed");
    return ptr;
}

void* simd_memcpy(void* dest, const void* src, size_t size) {
    // Use unaligned load/store instructions in case of unaligned addresses
    __m128i* d = (__m128i*)dest;
    const __m128i* s = (__m128i*)src;

    size_t alignedSize = (size / sizeof(__m128i)) * sizeof(__m128i); // Size that can be copied by 128-bit chunks

    // Copy aligned part using SIMD
    for (size_t i = 0; i < alignedSize / sizeof(__m128i); ++i) {
        _mm_storeu_si128(d++, _mm_loadu_si128(s++)); // Using unaligned instructions
    }

    // Handle remaining bytes if any
    size_t remainingBytes = size - alignedSize;
    if (remainingBytes > 0) {
        char* byteDest = (char*)d;
        const char* byteSrc = (const char*)s;
        // Standard byte-by-byte copy for the remainder
        memcpy(byteDest, byteSrc, remainingBytes);
    }

    return dest;
}

void test_memcpy(void* (*func)(void* , const void* , size_t), size_t size, int iterations, const char* funcName) {
    if (!func || size <= 0) {
        fprintf(stderr, "Invalid arguments: function is NULL or size is non-positive.\n");
        return;
    }

    const char* src = (const char*)malloc(size);
    char* dst = (char*)malloc(size);
    if (!src || !dst) {
        perror("malloc failed");
        exit(EXIT_FAILURE);
    }

    struct timespec start, end;
    if (clock_gettime(CLOCK_MONOTONIC, &start) == -1) {
        perror("clock_gettime failed");
    }

    for (int i = 0; i < iterations; ++i) {
        func(dst, src, size); // Execute copy
    }

    if (clock_gettime(CLOCK_MONOTONIC, &end) == -1) {
        perror("clock_gettime failed");
    }

    double elapsed = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) / 1e9;
    printf("Function: %s, Size: %zu bytes, Iterations: %d, Time: %.6f s\n", funcName, size, iterations, elapsed);
}

int main() {
    // Test standard memcpy
    test_memcpy(memcpy, SIZE_1K, 2048*2048*3, "memcpy");

    // Test GCC's built-in memcpy
    test_memcpy(__builtin_memcpy, SIZE_1K, 2048*2048*3, "__builtin_memcpy");

    // Test SSE-based SIMD memcpy, ensuring memory is aligned
    {

        test_memcpy(simd_memcpy, SIZE_1K, 2048*2048*3, "simd_memcpy");
    }

    return 0;
}
  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值