因memcpy导致tda4vm上的h264解码占CPU较高而改弃,从网上找到各种memcpy的优化代码,在一起做了个运行速度对比,请查收;
#include <stdio.h>
#include <stdlib.h> /* rand, srand */
#include <string.h>
#include <assert.h>
#include <sys/time.h>
#include <time.h> /* time() */
/*
* 可以一个字长一个字长的拷贝,而不需要逐个字节来拷贝
*/
void *memcpy_word_length(void *dst,const void *src, size_t num)
{
assert((dst!=NULL)&&(src!=NULL));
int wordnum = num/4; // 计算有多少个32位,按4字节拷贝
int slice = num%4; // 剩余的按字节拷贝
int * pintsrc = (int *)src;
int * pintdst = (int *)dst;
while(wordnum--)
*pintdst++ = *pintsrc++;
while (slice--)
*((char *)pintdst++) =*((char *)pintsrc++);
return dst;
}
/*
* 考虑了地址重叠
*/
void *memmove_address_overlap(void *dst, const void *src, size_t len)
{
//8字节长度
double *srcdb, *dstdb;
char *srcch, *dstch;
size_t times, left;
times = len / 8;
left = len % 8;
if (!src || !dst) return dst;
//从前往后复制
if (dst < src) {
srcdb = (double *)src;
dstdb = (double *)dst;
while (times--) {
*dstdb++ = *srcdb++;
len-=8;
}
srcch = (char *)srcdb;
dstch = (char *)dstdb;
while (left--) {
*dstch++ = *srcch++;
}
}
//从后往前复制
else if (dst > src) {
if (times > 0) {
dstdb = (double *)(dst+len-8);
srcdb = (double *)(src+len-8);
}
while(times--) {
*dstdb = *srcdb;
dstdb--;srcdb--;
}
if (len > 8 && left) {
srcch = (char *)(srcdb+1)-1;
dstch = (char *)(dstdb+1)-1;
while(left--) {
*dstch-- = *srcch--;
}
}
else {
srcch = (char *)src + (len-1);
dstch = (char *)dst + (len-1);
while (len--)
*dstch-- = *srcch--;
}
}
return dst;
}
static void get_rand_bytes(unsigned char *data, int len)
{
int i;
srand((unsigned)time(NULL)); //种下随机种子
for (i = 0; i < len; i++) {
data[i] = rand() % 255; //取随机数,并保证数在0-255之间
//printf("%02X ", data[i]);
}
}
static int get_cur_time_us(void)
{
struct timeval tv;
gettimeofday(&tv, NULL); //使用gettimeofday获取当前系统时间
return (tv.tv_sec * 1000 * 1000 + tv.tv_usec); //利用struct timeval结构体将时间转换为ms
}
#define ARRAY_SIZE(n) sizeof(n) / sizeof(n[0])
int main(void)
{
int size_list[] = {
1024 * 1024 * 10, // 10MB
1024 * 1024 * 1, // 1MB
1024 * 100, // 100KB
1024 * 10, // 10KB
1024 * 1, // 1KB
};
char *data1;
char *data2;
int t1;
int t2;
int i = 0;
data1 = (char *)malloc(size_list[0]);
data2 = (char *)malloc(size_list[0]);
get_rand_bytes((unsigned char *)data1, size_list[0]);
for (i = 0; i < ARRAY_SIZE(size_list); i++) {
t1 = get_cur_time_us();
memcpy(data2, data1, size_list[i]);
t2 = get_cur_time_us();
printf("copy %d bytes, memcpy waste time %dus\n", size_list[i], t2 - t1);
t1 = get_cur_time_us();
memcpy_word_length(data2, data1, size_list[i]);
t2 = get_cur_time_us();
printf("copy %d bytes, memcpy_word_length waste time %dus\n", size_list[i], t2 - t1);
t1 = get_cur_time_us();
memmove_address_overlap(data2, data1, size_list[i]);
t2 = get_cur_time_us();
printf("copy %d bytes, memmove_address_overlap waste time %dus\n\n", size_list[i], t2 - t1);
}
free(data1);
free(data2);
return 0;
}
#if 0
copy 10485760 bytes, memcpy waste time 7324us
copy 10485760 bytes, memcpy_word_length waste time 12940us
copy 10485760 bytes, memmove_address_overlap waste time 14450us
copy 1048576 bytes, memcpy waste time 704us
copy 1048576 bytes, memcpy_word_length waste time 1313us
copy 1048576 bytes, memmove_address_overlap waste time 1220us
copy 102400 bytes, memcpy waste time 81us
copy 102400 bytes, memcpy_word_length waste time 96us
copy 102400 bytes, memmove_address_overlap waste time 65us
copy 10240 bytes, memcpy waste time 2us
copy 10240 bytes, memcpy_word_length waste time 14us
copy 10240 bytes, memmove_address_overlap waste time 6us
copy 1024 bytes, memcpy waste time 0us
copy 1024 bytes, memcpy_word_length waste time 1us
copy 1024 bytes, memmove_address_overlap waste time 1us
#endif