一、简介
1.1 何为64?
将数据编码映射到[0-9][a-z][A-Z]+/
, 10+ 26+26+2 = 64个字符
1.2 如何编码
- 因为只有64个字符,映射使用的索引只能有
6bit
[0,63]进行表示。 - 1byte = 8bit, 3*8 = 4*6, 因此3字节映射成了4字节大小
- 每6bit为单位,
高位补0
- 不够4字节时,使用
=
进行填充
11111111 11111111 11111111
00111111 00111111 00111111 00111111
1.3 如何解码
- 编码后的数据就是映射后的字符流,现在需要反映射回原始数据。
- 字符反映射回下标
- 下标值去掉最高两位,将所有值拼接起来
二、代码实现
2.1 头文件
src/include/common/base64.h
#ifndef BASE64_H
#define BASE64_H
/* base 64 */
extern int pg_b64_encode(const char *src, int len, char *dst, int dstlen);
extern int pg_b64_decode(const char *src, int len, char *dst, int dstlen);
extern int pg_b64_enc_len(int srclen);
extern int pg_b64_dec_len(int srclen);
#endif /* BASE64_H */
2.2 评估编解码需要的空间
根据需要多少空间,提前分配好空间
- 避免空间不足导致数据被截断,导致无法编解码
- 避免为了正确编解码而进行多次动态扩容数据存储空间
编码: 3字节 =》4字节
解码: 4字节 =》3字节
/*
* pg_b64_enc_len
*
* Returns to caller the length of the string if it were encoded with
* base64 based on the length provided by caller. This is useful to
* estimate how large a buffer allocation needs to be done before doing
* the actual encoding.
*/
int
pg_b64_enc_len(int srclen)
{
/* 3 bytes will be converted to 4 */
return (srclen + 2) * 4 / 3;
}
/*
* pg_b64_dec_len
*
* Returns to caller the length of the string if it were to be decoded
* with base64, based on the length given by caller. This is useful to
* estimate how large a buffer allocation needs to be done before doing
* the actual decoding.
*/
int
pg_b64_dec_len(int srclen)
{
return (srclen * 3) >> 2;
}
2.3 编码
设计妙处1:
- 3byte处理一次
- pos = 2, 每次循环pos–
- pos小于0时,刚好循环三次,则有三个字节
pos<<3
, 第一次左移2<<3(16), 第二次左移1<<3(8), 第三次左移0<<3(0)buf为uint32
- buf的低三个字节则存储了需要处理的三个字节
- 依次从buf中取出6bit值作为下标值,从_base64映射出编码后的字符
设计妙处2:
- 当pos不等于2,则说明还有字节需要处理,但是又不够三个字节
- 至少有一个字节,则需要两个字节进行映射,因此没有判断,直接取了两次值
- 对于第三个字节,如果pos==0则说明有两个字节,直接取值,否则使用=进行填充
/*
* BASE64
*/
static const char _base64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
/*
* pg_b64_encode
*
* Encode into base64 the given string. Returns the length of the encoded
* string, and -1 in the event of an error with the result buffer zeroed
* for safety.
*/
int
pg_b64_encode(const char *src, int len, char *dst, int dstlen)
{
char *p;
const char *s,
*end = src + len;
int pos = 2;
uint32 buf = 0;
s = src;
p = dst;
while (s < end)
{
buf |= (unsigned char) *s << (pos << 3);
pos--;
s++;
/* write it out */
if (pos < 0)
{
/*
* Leave if there is an overflow in the area allocated for the
* encoded string.
*/
if ((p - dst + 4) > dstlen)
goto error;
*p++ = _base64[(buf >> 18) & 0x3f];
*p++ = _base64[(buf >> 12) & 0x3f];
*p++ = _base64[(buf >> 6) & 0x3f];
*p++ = _base64[buf & 0x3f];
pos = 2;
buf = 0;
}
}
if (pos != 2)
{
/*
* Leave if there is an overflow in the area allocated for the encoded
* string.
*/
if ((p - dst + 4) > dstlen)
goto error;
*p++ = _base64[(buf >> 18) & 0x3f];
*p++ = _base64[(buf >> 12) & 0x3f];
*p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
*p++ = '=';
}
Assert((p - dst) <= dstlen);
return p - dst;
error:
memset(dst, 0, dstlen);
return -1;
}
2.4 解码
设计妙处1:
- 使用b64lookup映射表直接反映射回原始下标值,而不用计算
static const int8 b64lookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
};
/*
* pg_b64_decode
*
* Decode the given base64 string. Returns the length of the decoded
* string on success, and -1 in the event of an error with the result
* buffer zeroed for safety.
*/
int
pg_b64_decode(const char *src, int len, char *dst, int dstlen)
{
const char *srcend = src + len,
*s = src;
char *p = dst;
char c;
int b = 0;
uint32 buf = 0;
int pos = 0,
end = 0;
while (s < srcend)
{
c = *s++;
/* Leave if a whitespace is found */
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
goto error;
if (c == '=')
{
/* end sequence */
if (!end)
{
if (pos == 2)
end = 1;
else if (pos == 3)
end = 2;
else
{
/*
* Unexpected "=" character found while decoding base64
* sequence.
*/
goto error;
}
}
b = 0;
}
else
{
b = -1;
if (c > 0 && c < 127)
b = b64lookup[(unsigned char) c];
if (b < 0)
{
/* invalid symbol found */
goto error;
}
}
/* add it to buffer */
buf = (buf << 6) + b;
pos++;
if (pos == 4)
{
/*
* Leave if there is an overflow in the area allocated for the
* decoded string.
*/
if ((p - dst + 1) > dstlen)
goto error;
*p++ = (buf >> 16) & 255;
if (end == 0 || end > 1)
{
/* overflow check */
if ((p - dst + 1) > dstlen)
goto error;
*p++ = (buf >> 8) & 255;
}
if (end == 0 || end > 2)
{
/* overflow check */
if ((p - dst + 1) > dstlen)
goto error;
*p++ = buf & 255;
}
buf = 0;
pos = 0;
}
}
if (pos != 0)
{
/*
* base64 end sequence is invalid. Input data is missing padding, is
* truncated or is otherwise corrupted.
*/
goto error;
}
Assert((p - dst) <= dstlen);
return p - dst;
error:
memset(dst, 0, dstlen);
return -1;
}