Base64是一种很常见的编码规范,其定义为:Base64内容传送编码被设计用来把任意序列的8位字节描述为一种不易被人直接识别的形式。(The Base64 Content-Transfer-Encoding is designed to represent arbitrary sequences of octets in a form that need not be humanly readable.),其作用是将二进制序列转换为人类可读的ASCII字符序列,常用在需用通过文本来传输二进制数据的协议中,如HTTP和SMTP等。本文就简单讲一下其编解码。
Base64编码规则:对于待编码数据,以3个字节为单位,依次取6位,前两位补0形成8位编码,由于3*8=4*6,3个字节的输入会编码成4个字节的输出。如果剩下的字符不足3个字节,则用0填充,输出字符使用'=',因此编码后输出的文本末尾可能会出现1或2个'='。
为了保证所输出的编码位可读字符,Base64制定了一个编码表,以便进行统一转换。编码表的大小为2^6=64,这也是Base64名称的由来。
Base64编码表
码值 码 码值 码 码值 码 码值 码 0 A 16 Q 32 g 48 w 1 B 17 R 33 h 49 x 2 C 18 S 34 i 50 y 3 D 19 T 35 j 51 z 4 E 20 U 36 k 52 0 5 F 21 V 37 l 53 1 6 G 22 W 38 m 54 2 7 H 23 X 39 n 55 3 8 I 24 Y 40 o 56 4 9 J 25 Z 41 p 57 5 10 K 26 a 42 q 58 6 11 L 27 b 43 r 59 7 12 M 28 c 44 s 60 8 13 N 29 d 45 t 61 9 14 O 30 e 46 u 62 + 15 P 31 f 47 v 63 /
(pad) =
编码详解
1. 不加后补位的字符串“abC” 01100001 01100010 0100001100 011000 00 010110 00 001001 00 000011
24 22 9 3 查表可以得到编码值为:“YWJD”。
2. 加后补位的字符串“ab”:
01100001 0110001000 011000 00 010110 00 001000 00 000000
24 22 8 -
由于不够24个字节位,所以我们要加8个0字节位以凑够24个。“-”表示增加的补位,编码后应为“=”,所以可以得到编码后的字符串为“YWI=”。
3. 加后补位的字符串“a”:
01100001
00 011000 00 010000 00 000000 00 000000
24 16 - -
同样,编码后的字符串为“YQ==”,只是这里出现了两个“=”。
算法实现--编码:
static
char
table64[]
=
"
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/
"
;
//
encode the data from inbuf and store the encoded data into outbuf
//
return the length of encoded data
//
NOTE: do not forget to free the outbuf allocated here
int
base64Encode(
const
void
*
inbuf,
int
inlen,
char
**
outbuf)
...
{ unsigned char ibuf[ 3 ]; unsigned char obuf[ 4 ]; int i; int inputparts; char * output; char * base64data; char * indata = ( char * )inbuf; if ( 0 == inlen) inlen = strlen(indata); base64data = output = ( char * )malloc(inlen * 4 / 3 + 4 ); if (NULL == output) return - 1 ; while (inlen > 0 ) ... { for (i = inputparts = 0 ; i < 3 ; i ++ ) ... { if (inlen > 0 ) ... { inputparts ++ ; ibuf[i] = * indata; indata ++ ; inlen -- ; } else ibuf[i] = 0 ; } obuf [0 ] = (ibuf [ 0 ] & 0xFC ) >> 2 ; obuf [ 1 ] = ((ibuf [ 0 ] & 0x03 ) << 4 ) | ((ibuf [ 1 ] & 0xF0 ) >> 4 ); obuf [ 2 ] = ((ibuf [ 1 ] & 0x0F ) << 2 ) | ((ibuf [ 2 ] & 0xC0 ) >> 6 ); obuf [ 3 ] = ibuf [ 2 ] & 0x3F ; switch (inputparts) ... { case 1 : /**/ /* only one byte read, two '=' needed */ sprintf(output, " %c%c== " , table64[obuf[ 0 ]], table64[obuf[ 1 ]]); break ; case 2 : /**/ /* two bytes read, one '=' needed */ sprintf(output, " %c%c%c= " , table64[obuf[ 0 ]], table64[obuf[ 1 ]], table64[obuf[ 2 ]]); break ; default : sprintf(output, " %c%c%c%c " , table64[obuf[ 0 ]], table64[obuf[ 1 ]], table64[obuf[ 2 ]], table64[obuf[ 3 ]] ); break ; } output += 4 ; } * output = 0 ; * outbuf = base64data; return strlen(base64data); }
算法实现--解码:
//
Decode a block (4 bytes)
void
decodeBlock(unsigned
char
*
dest,
char
*
src)
...
{ unsigned int x = 0 ; int i; for (i = 0 ; i < 4 ; i ++ ) ... { if (src[i] >= ' A ' && src[i] <= ' Z ' ) x = (x << 6 ) + (unsigned int )(src[i] - ' A ' + 0 ); else if (src[i] >= ' a ' && src[i] <= ' z ' ) x = (x << 6 ) + (unsigned int )(src[i] - ' a ' + 26 ); else if (src[i] >= ' 0 ' && src[i] <= ' 9 ' ) x = (x << 6 ) + (unsigned int )(src[i] - ' 0 ' + 52 ); else if (src[i] == ' + ' ) x = (x << 6 ) + 62 ; else if (src[i] == ' / ' ) x = (x << 6 ) + 63 ; else if (src[i] == ' = ' ) x = (x << 6 ); } dest[2 ] = (unsigned char )(x & 255 ); x >>= 8 ; dest[ 1 ] = (unsigned char )(x & 255 ); x >>= 8 ; dest[ 0 ] = (unsigned char )(x & 255 ); x >>= 8 ; }
//
decode the src string and store the decoded string to dest, return the
//
length of decoded string in len
//
NOTE: the length of dest buffer must be larger than (strlen(src)*3)/4+3
void
base64Decode(unsigned
char
*
dest,
char
*
src,
int
*
len)
...
{ int length = 0 ; int equalsTerm = 0 ; int i; int numBlocks; unsigned char lastBlock[ 3 ]; while ((src[length] != ' = ' ) && src[length]) length ++ ; while (src[length + equalsTerm] == ' = ' ) equalsTerm ++ ; numBlocks = (length + equalsTerm) / 4 ; if (len) * len = (numBlocks * 3 ) - equalsTerm; for (i = 0 ; i < numBlocks - 1 ; i ++ ) ... { decodeBlock(dest, src); dest += 3 ; src += 4 ; } decodeBlock(lastBlock, src); for (i = 0 ; i < 3 - equalsTerm; i ++ ) dest[i] = lastBlock[i]; }
-----------------------------
仅作学习和交流,部分资料来自互联网,转载请注明作者及出处,谢谢!