MD5初始值
小端模式:数据的低字节在数据流的前面。
大端模式:数据的高字节在数据流的前面。
md5算法规定了初始值,把它分成四组分别叫作A、B、C、D,并把每个分组看作一个整数(小端模式)。初始值如下:从左到右是按照字节递增的顺序。
A = 01234567
B = 89abcdef
C = fedcba98
D = 76543210
但是数字在Windows、Linux等系统中是以大端模式存储的,因此
A = 0x67452301
B = 0xefcdab89
C = 0x98badcfe
D = 0x10325476
可见,每一数字都占用32字节。如果用C、C++实现,就需要用无符号整型表示。我在这里用Java实现的,由于java没有无符号整型,所以就用long型表示:
A = 0x67452301L
B = 0xEFCDAB89L
C = 0x98BADCFEL
D = 0x10325476L
数据处理
数据指的是bit数据。可我们经常见到的是byte数据,所有在本例中,以字节为单位处理。
我们首先要做的就是补齐数据,假设原始数据长度为 length
字节,补的字节数为mend
,那么要使得总字节数满足:
此外,在mend
之中,还要预留出8
字节来保存原始数据
的 长度信息(bit)
。因此mend
不可能为0。
例如
当length = 0时, mend = 64
当length = 1时,mend = 63
当length = 55时,mend = 9
当length = 56时,mend = 72(理论上mend = 8, 但实践中并不对)
当length = 57时,mend = 71
在mend所代表的字节中,从低字节到高字节集依次填充数据,填充方法为:先填一个1,再填上0,在最后64bit中按照小端模式
填充原始数据
的 长度信息(bit)
。当length = 54,mend = 10时,填充内容如下:
10000000 00000000 10100000 00000010 00000000 00000000 00000000 00000000 00000000 00000000
|--------------------------||-----------------------------------------长度信息 54x8 = 432-------------------------------------------|
如果用16进制表示的话就是: 8000A002000000000000
数据分组
补好字节并填充好内容后,将新数据按64字节分为若干组。按照自然的顺序每组进行一次核心函数
的运算。
核心函数
在引入核心函数之前,要讲讲四个非线性函数。在核心函数里面会用到它们。它们分别是:
FF(a, b, c, d, group, const1, const2)
= b + (
(a + (b & c | ~b & d) + group + const2) << const1
|
(a + (b & c | ~b & d) + group + const2) >> (32 - const1)
)
GG(a, b, c, d, group, const1, const2)
= b + (
(a + (b & d | c & (~d)) + group + const2) << const1
|
(a + (b & d | c & (~d)) + group + const2) >> (32 - const1)
)
HH(a, b, c, d, group, const1, const2)
= b + (
(a + (b ^ c ^ d) + group + const2) << const1
|
(a + (b ^ c ^ d) + group + const2) >> (32 - const1)
)
II(a, b, c, d, group, const1, const2)
= b + (
(a + (c ^ (b | (~d))) + group + const2) << const1
|
(a + (c ^ (b | (~d))) + group + const2) >> (32 - const1)
)
其中, group指的是小分组
,。之前不是将数据每64字节分为一组了吗?这里的小分组就是那64个字节再次分组的结果。我们分为16个小分组,每个小分组为4个字节。同样,我们把这4个字节看成是以小端模式存储的整数。所以在变成java 的long型时,注意转换
。
const1 和 const2 是两个常数,由MD5算法规定,请见文后的代码
。
核心函数的计算过程如下:
把ABCD复制一份,记作 a、b、c、d。
调用16次FF函数
调用16次GG函数
调用16次HH函数
调用16次II函数
把修改后
的abcd分别加在A、B、C、D上。注意: 每一个FF、GG、HH、II函数的返回结果都被附给a、b、c、d中的一个,具体是哪个由算法决定。详见
代码
调用一次核心函数后,A、B、C、D的值将被修改,计算下一组数据时,使用的是修改后的ABCD。所以我们把A、B、C、D存成全局变量。
翻译成文本
每次调用核心函数,都会把A、B、C、D修改一次。把每一组数据经过核心函数计算之后,我们得到了最终的A、B、C、D。
这时已经完成了所有的计算工作。我们把A、B、C、D按照物理的字节序转换成16进制输出即可。
例如:假设最终得到
A = 0xed41d8cdL 按字节序表示为:cdd841ed
B = 0x98f00b20L 按字节序表示为:200bf098
C = 0x4e980099L 按字节序表示为:9900984e
D = 0x8ecf8427L 按字节序表示为:2784cf8e
则输出结果是:cdd841ed200bf0989900984e2784cf8e
代码
package md5;
public class Md5 {
//一些常量
private static final long part1 = 0x67452301L;
private static final long part2 = 0xEFCDAB89L;
private static final long part3 = 0x98BADCFEL;
private static final long part4 = 0x10325476L;
private static final long[] const1s = {7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21};
private static final long[] const2s = {
0xd76aa478L, 0xe8c7b756L, 0x242070dbL, 0xc1bdceeeL, 0xf57c0fafL, 0x4787c62aL, 0xa8304613L, 0xfd469501L,
0x698098d8L, 0x8b44f7afL, 0xffff5bb1L, 0x895cd7beL, 0x6b901122L, 0xfd987193L, 0xa679438eL, 0x49b40821L,
0xf61e2562L, 0xc040b340L, 0x265e5a51L, 0xe9b6c7aaL, 0xd62f105dL, 0x02441453L, 0xd8a1e681L, 0xe7d3fbc8L,
0x21e1cde6L, 0xc33707d6L, 0xf4d50d87L, 0x455a14edL, 0xa9e3e905L, 0xfcefa3f8L, 0x676f02d9L, 0x8d2a4c8aL,
0xfffa3942L, 0x8771f681L, 0x6d9d6122L, 0xfde5380cL, 0xa4beea44L, 0x4bdecfa9L, 0xf6bb4b60L, 0xbebfbc70L,
0x289b7ec6L, 0xeaa127faL, 0xd4ef3085L, 0x04881d05L, 0xd9d4d039L, 0xe6db99e5L, 0x1fa27cf8L, 0xc4ac5665L,
0xf4292244L, 0x432aff97L, 0xab9423a7L, 0xfc93a039L, 0x655b59c3L, 0x8f0ccc92L, 0xffeff47dL, 0x85845dd1L,
0x6fa87e4fL, 0xfe2ce6e0L, 0xa3014314L, 0x4e0811a1L, 0xf7537e82L, 0xbd3af235L, 0x2ad7d2bbL, 0xeb86d391L
};
private static final char[] hex = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
//全局变量
private static long A;
private static long B;
private static long C;
private static long D;
//四个非线性函数
private static long FF(long a, long b, long c, long d, long group, long const1, long const2) {
a = (a + (b & c | ~b & d) + group + const2) & 0xFFFFFFFFL;
return b + (a << const1 | a >> (32 - const1));
}
private static long GG(long a, long b, long c, long d, long group, long const1, long const2) {
a = (a + (b & d | c & (~d)) + group + const2) & 0xFFFFFFFFL;
return b + (a << const1 | a >> (32 - const1));
}
private static long HH(long a, long b, long c, long d, long group, long const1, long const2) {
a = (a + (b ^ c ^ d) + group + const2) & 0xFFFFFFFFL;
return b + (a << const1 | a >> (32 - const1));
}
private static long II(long a, long b, long c, long d, long group, long const1, long const2) {
a = (a + (c ^ (b | (~d))) + group + const2) & 0xFFFFFFFFL;
return b + (a << const1 | a >> (32 - const1));
}
//核心函数
private static void calculate(long[] groups) {
long a = A;
long b = B;
long c = C;
long d = D;
//第一轮
a = FF(a, b, c, d, groups[0], const1s[0], const2s[0]);
d = FF(d, a, b, c, groups[1], const1s[1], const2s[1]);
c = FF(c, d, a, b, groups[2], const1s[2], const2s[2]);
b = FF(b, c, d, a, groups[3], const1s[3], const2s[3]);
a = FF(a, b, c, d, groups[4], const1s[0], const2s[4]);
d = FF(d, a, b, c, groups[5], const1s[1], const2s[5]);
c = FF(c, d, a, b, groups[6], const1s[2], const2s[6]);
b = FF(b, c, d, a, groups[7], const1s[3], const2s[7]);
a = FF(a, b, c, d, groups[8], const1s[0], const2s[8]);
d = FF(d, a, b, c, groups[9], const1s[1], const2s[9]);
c = FF(c, d, a, b, groups[10], const1s[2], const2s[10]);
b = FF(b, c, d, a, groups[11], const1s[3], const2s[11]);
a = FF(a, b, c, d, groups[12], const1s[0], const2s[12]);
d = FF(d, a, b, c, groups[13], const1s[1], const2s[13]);
c = FF(c, d, a, b, groups[14], const1s[2], const2s[14]);
b = FF(b, c, d, a, groups[15], const1s[3], const2s[15]);
//第二轮
a = GG(a, b, c, d, groups[1], const1s[4], const2s[16]);
d = GG(d, a, b, c, groups[6], const1s[5], const2s[17]);
c = GG(c, d, a, b, groups[11], const1s[6], const2s[18]);
b = GG(b, c, d, a, groups[0], const1s[7], const2s[19]);
a = GG(a, b, c, d, groups[5], const1s[4], const2s[20]);
d = GG(d, a, b, c, groups[10], const1s[5], const2s[21]);
c = GG(c, d, a, b, groups[15], const1s[6], const2s[22]);
b = GG(b, c, d, a, groups[4], const1s[7], const2s[23]);
a = GG(a, b, c, d, groups[9], const1s[4], const2s[24]);
d = GG(d, a, b, c, groups[14], const1s[5], const2s[25]);
c = GG(c, d, a, b, groups[3], const1s[6], const2s[26]);
b = GG(b, c, d, a, groups[8], const1s[7], const2s[27]);
a = GG(a, b, c, d, groups[13], const1s[4], const2s[28]);
d = GG(d, a, b, c, groups[2], const1s[5], const2s[29]);
c = GG(c, d, a, b, groups[7], const1s[6], const2s[30]);
b = GG(b, c, d, a, groups[12], const1s[7], const2s[31]);
//第三轮
a = HH(a, b, c, d, groups[5], const1s[8], const2s[32]);
d = HH(d, a, b, c, groups[8], const1s[9], const2s[33]);
c = HH(c, d, a, b, groups[11], const1s[10], const2s[34]);
b = HH(b, c, d, a, groups[14], const1s[11], const2s[35]);
a = HH(a, b, c, d, groups[1], const1s[8], const2s[36]);
d = HH(d, a, b, c, groups[4], const1s[9], const2s[37]);
c = HH(c, d, a, b, groups[7], const1s[10], const2s[38]);
b = HH(b, c, d, a, groups[10], const1s[11], const2s[39]);
a = HH(a, b, c, d, groups[13], const1s[8], const2s[40]);
d = HH(d, a, b, c, groups[0], const1s[9], const2s[41]);
c = HH(c, d, a, b, groups[3], const1s[10], const2s[42]);
b = HH(b, c, d, a, groups[6], const1s[11], const2s[43]);
a = HH(a, b, c, d, groups[9], const1s[8], const2s[44]);
d = HH(d, a, b, c, groups[12], const1s[9], const2s[45]);
c = HH(c, d, a, b, groups[15], const1s[10], const2s[46]);
b = HH(b, c, d, a, groups[2], const1s[11], const2s[47]);
//第四轮
a = II(a, b, c, d, groups[0], const1s[12], const2s[48]);
d = II(d, a, b, c, groups[7], const1s[13], const2s[49]);
c = II(c, d, a, b, groups[14], const1s[14], const2s[50]);
b = II(b, c, d, a, groups[5], const1s[15], const2s[51]);
a = II(a, b, c, d, groups[12], const1s[12], const2s[52]);
d = II(d, a, b, c, groups[3], const1s[13], const2s[53]);
c = II(c, d, a, b, groups[10], const1s[14], const2s[54]);
b = II(b, c, d, a, groups[1], const1s[15], const2s[55]);
a = II(a, b, c, d, groups[8], const1s[12], const2s[56]);
d = II(d, a, b, c, groups[15], const1s[13], const2s[57]);
c = II(c, d, a, b, groups[6], const1s[14], const2s[58]);
b = II(b, c, d, a, groups[13], const1s[15], const2s[59]);
a = II(a, b, c, d, groups[4], const1s[12], const2s[60]);
d = II(d, a, b, c, groups[11], const1s[13], const2s[61]);
c = II(c, d, a, b, groups[2], const1s[14], const2s[62]);
b = II(b, c, d, a, groups[9], const1s[15], const2s[63]);
//收尾
A += a;
B += b;
C += c;
D += d;
}
//门面函数
public static String digest(byte[] bytes) {
//初始化ABCD
A = part1;
B = part2;
C = part3;
D = part4;
//足补位数
int size = bytes.length;
int mendCount = 0;
if(size % 64 == 0) {
mendCount = 64;
}else {
int surplus = (size % 64);
if(surplus < 56) {
mendCount = (64 - surplus);
}else {
mendCount = 64 + (64 - surplus);
}
}
//新的数据
byte[] data = new byte[size + mendCount];
for(int i = 0; i < data.length - 8; i++) {
if(i < size) {
data[i] = bytes[i];
}else if(i == size){
data[i] = (byte) (0x80);
}else {
data[i] = 0;
}
}
//写入数据长度信息(小端模式)
for(int i = 0; i < 8; i++) {
int index = data.length - 8 + i;
data[index] = (byte) ((size * 8L) >>> (8 * i)); //TODO: 修改
}
//分组调用核心函数计算
long[] masks = {
0x00000000FF000000L,
0x0000000000FF0000L,
0x000000000000FF00L,
0x00000000000000FFL
};
for(int i = 0; i < data.length / 64; i++) {
long[] groups = new long[16];
for(int j = 0; j < 16; j++) {
long l = 0;
int index = i * 64 + j * 4;
l |= (((long)data[index + 0]) << (0) & masks[3]);
l |= (((long)data[index + 1]) << (8) & masks[2]);
l |= (((long)data[index + 2]) << (16) & masks[1]);
l |= (((long)data[index + 3]) << (24) & masks[0]);
groups[j] = l;
}
calculate(groups);
}
//将结果按照小端模式字节序翻译成字符串
StringBuffer sb = new StringBuffer();
for(int i = 0; i < 4; i++) {
byte b = (byte) (A >> (i * 8));
sb.append(hex[(b & 0xF0) >> 4]);
sb.append(hex[b & 0x0F]);
}
for(int i = 0; i < 4; i++) {
byte b = (byte) (B >> (i * 8));
sb.append(hex[(b & 0xF0) >> 4]);
sb.append(hex[b & 0x0F]);
}
for(int i = 0; i < 4; i++) {
byte b = (byte) (C >> (i * 8));
sb.append(hex[(b & 0xF0) >> 4]);
sb.append(hex[b & 0x0F]);
}
for(int i = 0; i < 4; i++) {
byte b = (byte) (D >> (i * 8));
sb.append(hex[(b & 0xF0) >> 4]);
sb.append(hex[b & 0x0F]);
}
return sb.toString();
}
}