<span style="font-family:Times New Roman;font-size:18px;">// 实现任意字节对齐的内存分配和释放
void *AllignedMalloc(size_t size, int aligned)
{
// aligned is a power of 2
assert((aligned&(aligned - 1)) == 0);
// 分配内存空间
void *data = malloc(sizeof(void *)+aligned + size);
// 地址对齐
void **temp = (void **)data + 1;
void **alignedData = (void **)(((size_t)temp + aligned - 1)&-aligned);
// 保存原始内存地址
alignedData[-1] = data;
return alignedData; // 被转换为一级指针
}</span>
<span style="font-family:Times New Roman;font-size:18px;">void AlignedFree(void *data)
{
if (data)
{
free(((void **)data)[-1]);
}
}</span></span>
这里以32位系统,16字节对齐为例,用示意图表示更加清楚一点。
原始分配内存图
下面考虑两种情况:
这里要注意转为二级指针后:
1. 指向的内存区域是指针变量,存放的是地址,即编译器对内存的解析发生变化
2. 对void **进行自增++,移动的是一个指针变量的大小
在mmx,sse优化的时候经常要求内存按8,16字节对齐。但是默认的编译器一般都是8字节对齐。所以需要在分配内存的时候,能按16或则其他字节对齐。
以下是从xvid工程中找到的任意字节对齐的内存分配函数。
<span style="font-family:Times New Roman;font-size:18px;">/*****************************************************************************
* align_malloc
*
* This function allocates 'size' bytes (usable by the user) on the heap and
* takes care of the requested 'alignment'.
* In order to align the allocated memory block, the xvid_malloc allocates
* 'size' bytes + 'alignment' bytes. So try to keep alignment very small
* when allocating small pieces of memory.
*
* NB : a block allocated by xvid_malloc _must_ be freed with xvid_free
* (the libc free will return an error)
*
* Returned value : - NULL on error
* - Pointer to the allocated aligned block
*
****************************************************************************/
void * align_malloc(unsigned int size, unsigned int alignment)
{
unsigned char * mem_ptr;
unsigned char * tmp;
if(!alignment) alignment=4; //至少按4对齐
/* Allocate the required size memory + alignment so we
* can realign the data if necessary */
if ((tmp = (unsigned char *) malloc(size + alignment)) != NULL) {
/* Align the tmp pointer */
mem_ptr =
(unsigned char *) ((unsigned int) (tmp + alignment - 1) &
(~(unsigned int) (alignment - 1)));
/* Special case where malloc have already satisfied the alignment
* We must add alignment to mem_ptr because we must store
* (mem_ptr - tmp) in *(mem_ptr-1)
* If we do not add alignment to mem_ptr then *(mem_ptr-1) points
* to a forbidden memory space */
if (mem_ptr == tmp)
mem_ptr += alignment;
/* (mem_ptr - tmp) is stored in *(mem_ptr-1) so we are able to retrieve
* the real malloc block allocated and free it in xvid_free */
*(mem_ptr - 1) = (unsigned char) (mem_ptr - tmp);
//PRT("Alloc mem addr: 0x%08x, size:% 8d, file:%s <line:%d>, ", tmp, size, file, line);
/* Return the aligned pointer */
return ((void *)mem_ptr);
}
return(NULL);
}
/*****************************************************************************
* align_free
*
* Free a previously 'xvid_malloc' allocated block. Does not free NULL
* references.
*
* Returned value : None.
*
****************************************************************************/
void align_free(void *mem_ptr)
{
unsigned char *ptr;
if (mem_ptr == NULL)
return;
/* Aligned pointer */
ptr = ( unsigned char *)mem_ptr;
/* *(ptr - 1) holds the offset to the real allocated block
* we sub that offset os we free the real pointer */
ptr -= *(ptr - 1);
/* Free the memory */
free(ptr);
}
</span>
这个以任意字节内存对齐在笔试中已经遇到两次,然而却还是不会。再找工作中这个题目很能体现一个人的基础水平,注意学习。
#define ngx_align(d, a) (((d) + (a - 1)) & ~(a - 1))
#include <stdio.h>
int
i;
int
main() {
printf
(
"64\n"
);
for
(i=1;i<=256;i++) {
printf
(
"%3d %3d,"
,i,ngx_align(i, 64));
if
(7==i%8)
printf
(
"\n"
);
}
printf
(
"\n"
);
printf
(
"128\n"
);
for
(i=1;i<=256;i++) {
printf
(
"%3d %3d,"
,i,ngx_align(i, 128));
if
(7==i%8)
printf
(
"\n"
);
}
printf
(
"\n"
);
return
0;
}
//64
// 1 64, 2 64, 3 64, 4 64, 5 64, 6 64, 7 64,
// 8 64, 9 64, 10 64, 11 64, 12 64, 13 64, 14 64, 15 64,
// 16 64, 17 64, 18 64, 19 64, 20 64, 21 64, 22 64, 23 64,
// 24 64, 25 64, 26 64, 27 64, 28 64, 29 64, 30 64, 31 64,
// 32 64, 33 64, 34 64, 35 64, 36 64, 37 64, 38 64, 39 64,
// 40 64, 41 64, 42 64, 43 64, 44 64, 45 64, 46 64, 47 64,
// 48 64, 49 64, 50 64, 51 64, 52 64, 53 64, 54 64, 55 64,
// 56 64, 57 64, 58 64, 59 64, 60 64, 61 64, 62 64, 63 64,
// 64 64, 65 128, 66 128, 67 128, 68 128, 69 128, 70 128, 71 128,
// 72 128, 73 128, 74 128, 75 128, 76 128, 77 128, 78 128, 79 128,
// 80 128, 81 128, 82 128, 83 128, 84 128, 85 128, 86 128, 87 128,
// 88 128, 89 128, 90 128, 91 128, 92 128, 93 128, 94 128, 95 128,
// 96 128, 97 128, 98 128, 99 128,100 128,101 128,102 128,103 128,
//104 128,105 128,106 128,107 128,108 128,109 128,110 128,111 128,
//112 128,113 128,114 128,115 128,116 128,117 128,118 128,119 128,
//120 128,121 128,122 128,123 128,124 128,125 128,126 128,127 128,
//128 128,129 192,130 192,131 192,132 192,133 192,134 192,135 192,
//136 192,137 192,138 192,139 192,140 192,141 192,142 192,143 192,
//144 192,145 192,146 192,147 192,148 192,149 192,150 192,151 192,
//152 192,153 192,154 192,155 192,156 192,157 192,158 192,159 192,
//160 192,161 192,162 192,163 192,164 192,165 192,166 192,167 192,
//168 192,169 192,170 192,171 192,172 192,173 192,174 192,175 192,
//176 192,177 192,178 192,179 192,180 192,181 192,182 192,183 192,
//184 192,185 192,186 192,187 192,188 192,189 192,190 192,191 192,
//192 192,193 256,194 256,195 256,196 256,197 256,198 256,199 256,
//200 256,201 256,202 256,203 256,204 256,205 256,206 256,207 256,
//208 256,209 256,210 256,211 256,212 256,213 256,214 256,215 256,
//216 256,217 256,218 256,219 256,220 256,221 256,222 256,223 256,
//224 256,225 256,226 256,227 256,228 256,229 256,230 256,231 256,
//232 256,233 256,234 256,235 256,236 256,237 256,238 256,239 256,
//240 256,241 256,242 256,243 256,244 256,245 256,246 256,247 256,
//248 256,249 256,250 256,251 256,252 256,253 256,254 256,255 256,
//256 256,
//128
// 1 128, 2 128, 3 128, 4 128, 5 128, 6 128, 7 128,
// 8 128, 9 128, 10 128, 11 128, 12 128, 13 128, 14 128, 15 128,
// 16 128, 17 128, 18 128, 19 128, 20 128, 21 128, 22 128, 23 128,
// 24 128, 25 128, 26 128, 27 128, 28 128, 29 128, 30 128, 31 128,
// 32 128, 33 128, 34 128, 35 128, 36 128, 37 128, 38 128, 39 128,
// 40 128, 41 128, 42 128, 43 128, 44 128, 45 128, 46 128, 47 128,
// 48 128, 49 128, 50 128, 51 128, 52 128, 53 128, 54 128, 55 128,
// 56 128, 57 128, 58 128, 59 128, 60 128, 61 128, 62 128, 63 128,
// 64 128, 65 128, 66 128, 67 128, 68 128, 69 128, 70 128, 71 128,
// 72 128, 73 128, 74 128, 75 128, 76 128, 77 128, 78 128, 79 128,
// 80 128, 81 128, 82 128, 83 128, 84 128, 85 128, 86 128, 87 128,
// 88 128, 89 128, 90 128, 91 128, 92 128, 93 128, 94 128, 95 128,
// 96 128, 97 128, 98 128, 99 128,100 128,101 128,102 128,103 128,
//104 128,105 128,106 128,107 128,108 128,109 128,110 128,111 128,
//112 128,113 128,114 128,115 128,116 128,117 128,118 128,119 128,
//120 128,121 128,122 128,123 128,124 128,125 128,126 128,127 128,
//128 128,129 256,130 256,131 256,132 256,133 256,134 256,135 256,
//136 256,137 256,138 256,139 256,140 256,141 256,142 256,143 256,
//144 256,145 256,146 256,147 256,148 256,149 256,150 256,151 256,
//152 256,153 256,154 256,155 256,156 256,157 256,158 256,159 256,
//160 256,161 256,162 256,163 256,164 256,165 256,166 256,167 256,
//168 256,169 256,170 256,171 256,172 256,173 256,174 256,175 256,
//176 256,177 256,178 256,179 256,180 256,181 256,182 256,183 256,
//184 256,185 256,186 256,187 256,188 256,189 256,190 256,191 256,
//192 256,193 256,194 256,195 256,196 256,197 256,198 256,199 256,
//200 256,201 256,202 256,203 256,204 256,205 256,206 256,207 256,
//208 256,209 256,210 256,211 256,212 256,213 256,214 256,215 256,
//216 256,217 256,218 256,219 256,220 256,221 256,222 256,223 256,
//224 256,225 256,226 256,227 256,228 256,229 256,230 256,231 256,
//232 256,233 256,234 256,235 256,236 256,237 256,238 256,239 256,
//240 256,241 256,242 256,243 256,244 256,245 256,246 256,247 256,
//248 256,249 256,250 256,251 256,252 256,253 256,254 256,255 256,
//256 256,
//
类似于取模,只不过它取的是能被整除的部分而不是模。
#define ngx_align(d, a) (((d) + (a - 1)) & ~(a - 1))
1)先看~(a-1)的含义
假设a是8 对应二进制是0000 1000
(a-1)对应的就是 0000 0111
~(a-1)对应的是 1111 1000
任何一个数 与~(a-1)按位相与 就是把第三方置0 也就是把该数变成 8的倍数
对应题目中的就是 64、128的倍数 也就形成了对齐
2)对于d + (a-1)的操作目的就是
eg:70以64对齐 结果是128而不是64的区别了
左半部分 d+a-1 保证处理后的值,这个值除以a得到的商(如果d%a=0,商不变,否则商+1)
右半部分&~(a-1)相当于把余数抹去
。
这里,对a有要求,a必须是2的k次幂,也就是a=1,2,4,8,16....
内存的对其问题,目的是为了加快IO效率。