写入吞吐量大约在17W每秒,当时我们使用的HBase为90.4。
然后对于Bytes.compare()方法,90.4中如下:
public static int compareTo(byte[] buffer1, int offset1, int length1,
byte[] buffer2, int offset2, int length2) {
// Bring WritableComparator code local
int end1 = offset1 + length1;
int end2 = offset2 + length2;
for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
int a = (buffer1[i] & 0xff);
int b = (buffer2[j] & 0xff);
if (a != b) {
return a - b;
}
}
return length1 - length2;
}
其实就是一个byte一个byte的对Array进行从前到后的比较,同时能反回两个byte串按bit位的大小。
而在92之后,Bytes.compare()被cloudera 的Todd改成了下面这个样子,并且速度比以上方法甚至C的memcmp都要快了4倍左右。
主要是使用了unsafe包直接读取JVM内存并将bytes.Array转成了Long。从而使每次比较的bit数从8提高到64。如此性能大幅提高了。
/**
* Lexicographically compare two arrays.
*
* @param buffer1 left operand
* @param buffer2 right operand
* @param offset1 Where to start comparing in the left buffer
* @param offset2 Where to start comparing in the right buffer
* @param length1 How much to compare from the left buffer
* @param length2 How much to compare from the right buffer
* @return 0 if equal, < 0 if left is less than right, etc.
*/
@Override
public int compareTo(byte[] buffer1, int offset1, int length1,
byte[] buffer2, int offset2, int length2) {
// Short circuit equal case
if (buffer1 == buffer2 &&
offset1 == offset2 &&
length1 == length2) {
return 0;
}
int minLength = Math.min(length1, length2);
int minWords = minLength / SIZEOF_LONG;
int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET;
int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET;
/*
* Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a
* time is no slower than comparing 4 bytes at a time even on 32-bit.
* On the other hand, it is substantially faster on 64-bit.
*/
for (int i = 0; i < minWords * SIZEOF_LONG; i += SIZEOF_LONG) {
long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i);
long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i);
long diff = lw ^ rw;
if (diff != 0) {
if (!littleEndian) {
return lessThanUnsigned(lw, rw) ? -1 : 1;
}
/* 正常情况下加一个Long.MIN_VALUE去掉符号位影响,按LONG比最后一位。
static boolean lessThanUnsigned(long x1, long x2) {
return (x1 + Long.MIN_VALUE) < (x2 + Long.MIN_VALUE);
}
*/
/*
littleEndian时从右到左,找到最开始不一样的8位来比较。
*/
// Use binary search
int n = 0;
int y;
int x = (int) diff;
if (x == 0) {
x = (int) (diff >>> 32);
n = 32;
}
y = x << 16;
if (y == 0) {
n += 16;
} else {
x = y;
}
y = x << 8;
if (y == 0) {
n += 8;
}
return (int) (((lw >>> n) & 0xFFL) - ((rw >>> n) & 0xFFL));
}
}
// The epilogue to cover the last (minLength % 8) elements.
for (int i = minWords * SIZEOF_LONG; i < minLength; i++) {
int a = (buffer1[offset1 + i] & 0xff);
int b = (buffer2[offset2 + i] & 0xff);
if (a != b) {
return a - b;
}
}
return length1 - length2;
}
下面是一些简单的测试。Bytes.compare New就是新方法所花费的时间。
Byte Array size : 64B, Unit of Time: ms | ||||||
Compare times | C++ | JAVA | ||||
memcmp | Bytes.compare Old | Array.equals | JNI native memcmp | byte by byte | Bytes.compare New | |
100000 | 10 | 19 | 1 | 26 | 5 | 6 |
1000000 | 120 | 94 | 10 | 250 | 52 | 22 |
10000000 | 970 | 779 | 517 | 2497 | 521 | 223 |
100000000 | 7110 | 5631 | 5168 | 24964 | 5205 | 2229 |
Byte Array size : 20KB, Unit of Time: ms | ||||||
Compare times | C++ | JAVA | ||||
memcmp | Bytes.compare Old | Array.equals | JNI native memcmp | byte by byte | Bytes.compare New | |
1 | 0 | 2 | 1 | 0 | 1 | 149 |
10 | 0 | 7 | 5 | 0 | 7 | 4 |
100 | 0 | 6 | 5 | 3 | 12 | 9 |
1000 | 30 | 32 | 26 | 20 | 16 | 4 |
10000 | 270 | 328 | 263 | 201 | 160 | 38 |
100000 | 1900 | 1873 | 1586 | 2052 | 1584 | 365 |
Byte Array size : 200KB, Unit of Time: ms | ||||||
Compare times | C++ | JAVA | ||||
memcmp | Bytes.compare Old | Array.equals | JNI native memcmp | byte by byte | Bytes.compare New | |
1 | 0 | 8 | 4 | 1 | 4 | 124 |
10 | 0 | 5 | 2 | 3 | 10 | 5 |
100 | 30 | 34 | 16 | 28 | 16 | 7 |
1000 | 280 | 339 | 160 | 285 | 159 | 38 |
10000 | 1850 | 2033 | 1592 | 2833 | 1590 | 373 |
100000 | 16760 | 16278 | 15929 | 28313 | 15923 | 3662 |
Byte Array size : 2000KB, Unit of Time: ms | ||||||
Compare times | C++ | JAVA | ||||
memcmp | Bytes.compare Old | Array.equals | JNI native memcmp | byte by byte | Bytes.compare New | |
1 | 0 | 11 | 6 | 4 | 7 | 126 |
10 | 30 | 34 | 16 | 35 | 29 | 5 |
100 | 290 | 322 | 160 | 350 | 160 | 40 |
1000 | 1890 | 2136 | 1597 | 3489 | 1594 | 379 |
10000 | 16820 | 19952 | 15983 | 34939 | 15969 | 3779 |
100000 | 168160 | 162588 | 159750 | 349309 | 159665 | 37761 |
public static int compareTo(byte[] buffer1, int offset1, int length1,
byte[] buffer2, int offset2, int length2) {
// Bring WritableComparator code local
int end1 = offset1 + length1;
int end2 = offset2 + length2;
for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
int a = (buffer1[i] & 0xff);
int b = (buffer2[j] & 0xff);
if (a != b) {
return a - b;
}
}
return length1 - length2;
}
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <iostream.h>
using namespace std;
int main()
{
int size=64;
long times=1000000000;
time_t c_start,c_end;
clock_t clockBegin, clockEnd;
for(int i=1;i<times;i*=10)
{
char * a= new char[size];
char * b= new char[size];
for(int j=0;j<size;j++)
{
a[j]=0;
b[j]=0;
}
clockBegin = clock();
c_start = time(NULL);
for(int j=0;j<i;j++)
{
int c = memcmp(a,b,size);
}
c_end = time(NULL);
clockEnd= clock();
delete [] a;
delete [] b;
long dtime=c_end-c_start;
cout<<"times is:"<<i<<endl;
cout<<"time is:"<<dtime<< "clock time"<< clockEnd-clockBegin<<endl;
}
return 0;
}