前言
一直以来不能操作内存就是横在javaer心中的一根刺,都是计算机语言咋就不能访问下内存呢?但是其实java一直都是具备这个能力的,unsafe实现了c的核心库,包括申请内存,释放内存, CAS。java并发包就大量使用了unsafe类,netty的zero copy也由此而来,IO性能得以大幅提升。
获取unsafe
try {
getUnsafe = Unsafe.class.getDeclaredField("theUnsafe");
getUnsafe.setAccessible(true);
unsafe = (Unsafe) getUnsafe.get(null);
} catch (Exception e) {
e.printStackTrace();
}
获取java对象的地址
public static long getLocation(Object obj) {
/**
* 常量获取
*
* 可以获取地址大小(addressSize),页大小(pageSize),基本类型数组的偏移量
* (Unsafe.ARRAY_INT_BASE_OFFSET\Unsafe.ARRAY_BOOLEAN_BASE_OFFSET等)、
* 基本类型数组内元素的间隔(Unsafe.ARRAY_INT_INDEX_SCALE\Unsafe.ARRAY_BOOLEAN_INDEX_SCALE等)
*/
Object[] array = new Object[]{obj};
long baseOffset = unsafe.arrayBaseOffset(Object[].class);
//get os address size
int addressSize = unsafe.addressSize();
// System.out.println("" + addressSize);
long location;
switch (addressSize) {
case 4:
location = unsafe.getInt(array, baseOffset);
break;
case 8:
location = unsafe.getLong(array, baseOffset);
break;
default:
throw new Error("unsupported address size: " + addressSize);
}
// System.out.println("" + location);
// System.out.println(unsafe.getInt(location));
return location;
}
获取对象对应的详细地址
// 获取地址
public static void main(String[] args) {
int[] arr = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
// 相对起始点的地址偏移----这里是12
int b = unsafe.arrayBaseOffset(int[].class);
// 每个数组元素的字节数----这里是4
int s = unsafe.arrayIndexScale(int[].class);
// 118767816 地址值
long location = getLocation(arr);
location+=b;
for (int i = 0; i < 10; i++) {
// 根据地址修改值,不通过索引
unsafe.putInt(location+(i*s),i*100);
System.out.println("数组地址:"+getAdressHex(location+(i*s)));
}
for(int i=0;i<10;i++) {
System.out.println(arr[i]);
}
}
最终的输出结果:
堆内堆外读写对比
public static void main(String[] args) throws Exception {
Person p = new Person();
p.setAge("20");
Class<Person> personClass = Person.class;
Field name = personClass.getField("NAME");
Field age = personClass.getField("age");
Field a = personClass.getField("a");
try {
System.out.println("objectFieldOffset age -->" + unsafe.objectFieldOffset(age));
} catch (Exception e) {
System.out.println(e.getMessage());
}
try {
System.out.println("objectFieldOffset a -->" + unsafe.objectFieldOffset(a));
} catch (Exception e) {
System.out.println(e.getMessage());
}
try {
System.out.println("staticFieldOffset name -->" + unsafe.staticFieldOffset(name));
} catch (Exception e) {
System.out.println(e.getMessage());
}
System.out.println(unsafe.getInt(personClass,12));
int[] arr = {1,2,3,4,5,6,7,8,9,10};
int b = unsafe.arrayBaseOffset(int[].class);
int s = unsafe.arrayIndexScale(int[].class);
System.out.println("b:"+b+",s:"+s);
unsafe.putInt(arr, (long)b+s*9, 1);
long starttime = System.currentTimeMillis();
for(int i=0;i<100000000;i++){
unsafe.putInt(arr, (long)b+s*(i%10), i);
}
System.out.println("unsafe use time:"+(System.currentTimeMillis()-starttime));
starttime = System.currentTimeMillis();
for(int i=0;i<100000000;i++) {
arr[i%10] = i;
}
System.out.println("normal use time:"+(System.currentTimeMillis()-starttime));
}
输出结果:
可以看出对于操作队列来说,上亿次写入差距很小。
ByteBuffer的IO测试
测这个IO的原因是因为但凡游戏逻辑,大比重的时间占比在于IO,如果抛开IO不谈,直接读写内存,游戏能抗住的压力绝对不是现在这个程度。所以IO在抗压上举足轻重,这个IO涉及到一点是zero copy,下面来看测试结果。
public class ByteBufferTest{
public static final int REPETITIONS = 1 * 1000 * 1000;
private static final byte[] bs = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890-=!@#$%^&*()".getBytes();
private static final byte[] bytes = new byte[1024*32] ;static {
for (int i = 0 ; i<bytes.length;i++){
bytes[i] = bs[i%bs.length];
}}
public static void main(String[] args) throws InterruptedException
{
for (int i =0;i<5;i++) {//普通内存读写
long startTime = System.nanoTime();
byte[] memory = new byte[bytes.length];
for (int j =0;j<REPETITIONS;j++) {
for (int k = 0;k<bytes.length;k++) {
HeapUtil.setByte(memory, k, bytes[k]);
}
}
long spendWrite = System.nanoTime()-startTime;
System.gc();
Thread.sleep(2000);
startTime = System.nanoTime();
byte[] readTemp = new byte[bytes.length];
for (int j =0;j<REPETITIONS;j++) {
for (int k = 0;k<bytes.length;k++) {
readTemp[k] = HeapUtil.getByte(memory, k);
}
}
long spendRead = System.nanoTime()-startTime;
System.out.println("普通数组 write:"+spendWrite/REPETITIONS+" | read :"+spendRead/REPETITIONS);
}
for (int i =0;i<5;i++) {//Unsafe内存拷贝
long startTime = System.nanoTime();
byte[] memory = new byte[bytes.length];
for (int j =0;j<REPETITIONS;j++) {
for (int k = 0;k<bytes.length;k++) {
UnsafeUtil.putByte(memory, k, bytes[k]);
}
}
long spendWrite = System.nanoTime()-startTime;
System.gc();
Thread.sleep(2000);
startTime = System.nanoTime();
byte[] readTemp = new byte[bytes.length];
for (int j =0;j<REPETITIONS;j++) {
for (int k = 0;k<bytes.length;k++) {
readTemp[k] = UnsafeUtil.getByte(memory, k);
}
}
long spendRead = System.nanoTime()-startTime;
System.out.println("UNSAFE数组 write:"+spendWrite/REPETITIONS+" | read :"+spendRead/REPETITIONS);
}
for (int i =0;i<5;i++) {//HeapByteBuffer内存读写
long startTime = System.nanoTime();
ByteBuffer byteBuffer = ByteBuffer.allocate(1024*32);
for (int j =0;j<REPETITIONS;j++) {
byteBuffer.clear();
for (int k = 0;k<bytes.length;k++) {
byteBuffer.put(bytes[k]);
}
}
long spendWrite = System.nanoTime()-startTime;
System.gc();
Thread.sleep(2000);
startTime = System.nanoTime();
byte[] readTemp = new byte[bytes.length];
byteBuffer.flip();
for (int j =0;j<REPETITIONS;j++) {
for (int k = 0;k<bytes.length;k++) {
readTemp[k] = byteBuffer.get(k);
}
}
long spendRead = System.nanoTime()-startTime;
System.out.println("普通堆内 write:"+spendWrite/REPETITIONS+" | read:"+spendRead/REPETITIONS);
}
for (int i =0;i<5;i++) {//普通内存写
long startTime = System.nanoTime();
ByteBuffer byteBuffer = ByteBuffer.allocateDirect(1024*32);
for (int j =0;j<REPETITIONS;j++) {
byteBuffer.clear();
for (int k = 0;k<bytes.length;k++) {
byteBuffer.put(bytes[k]);
}
}
long spendWrite = System.nanoTime()-startTime;
System.gc();
Thread.sleep(2000);
startTime = System.nanoTime();
byte[] readTemp = new byte[bytes.length];
byteBuffer.flip();
for (int j =0;j<REPETITIONS;j++) {
for (int k = 0;k<bytes.length;k++) {
readTemp[k] = byteBuffer.get(k);
}
}
long spendRead = System.nanoTime()-startTime;
System.out.println("普通堆外 write:"+spendWrite/REPETITIONS+" | read:"+spendRead/REPETITIONS);
}
for (int i =0;i<5;i++) {//Unsafe内存拷贝
long startTime = System.nanoTime();
ByteBuffer byteBuffer = ByteBuffer.allocateDirect(1024*32);
long address = UnsafeUtil.getAddress(byteBuffer);
for (int j =0;j<REPETITIONS;j++) {
for (int k = 0;k<bytes.length;k++) {
UnsafeUtil.putByte(address+k, bytes[k]);
}
}
long spendWrite = System.nanoTime()-startTime;
System.gc();
Thread.sleep(2000);
startTime = System.nanoTime();
byte[] readTemp = new byte[bytes.length];
for (int j =0;j<REPETITIONS;j++) {
for (int k = 0;k<bytes.length;k++) {
readTemp[k] = UnsafeUtil.getByte(address+k);
}
}
long spendRead = System.nanoTime()-startTime;
System.out.println("UNSAFE堆外 write:"+spendWrite/REPETITIONS+" | read :"+spendRead/REPETITIONS);
}
}
}
结果:
普通数组 write:2055 | read :2438
UNSAFE数组 write:3101 | read :2751
普通堆内 write:58587 | read:14177
普通堆外 write:36486 | read:17488
UNSAFE堆外 write:13225 | read :17433
可见的是读没有什么好大的区别,关键在于写,如果自己手撸就是普通堆外,netty采用的这是unsafe堆外(前提是能获取unsafe的情况下,不然则是普通堆外),写到堆外内存就是为了减少IO传输时jvm到jvm之外的内存写的时间(zero copy),unsafe的实现直接让访问堆外内存最具效率。
unsafe操作内存的核心在于 偏移量+字节占比,上述过程涵盖了核心方法,下面看下java的核心并发包如何使用.
此处是ConcurrentHashMap<K,V>的Unsafe获取,这种方式是被第三方jar禁止,只能用于java自身的启动加载类。第三方jar获取唯一的方式就是反射。
这里只看unsafe使用的核心代码
// map的put方式实现
public V put(K key, V value) {
return putVal(key, value, false);
}
final V putVal(K key, V value, boolean onlyIfAbsent) {
// 修改key的值
addCount(1L, binCount);
return null;
}
// addCount这里大量使用cas方法,此方法在java内是native方式,最终实现也是上锁,不过是最底层汇编值对比时上锁,锁细化。
private final void addCount(long x, int check) {
CounterCell[] as; long b, s;
if ((as = counterCells) != null ||
!U.compareAndSwapLong(this, BASECOUNT, b = baseCount, s = b + x)) {
CounterCell a; long v; int m;
boolean uncontended = true;
if (as == null || (m = as.length - 1) < 0 ||
(a = as[ThreadLocalRandom.getProbe() & m]) == null ||
!(uncontended =
U.compareAndSwapLong(a, CELLVALUE, v = a.value, v + x))) {
fullAddCount(x, uncontended);
return;
}
if (check <= 1)
return;
s = sumCount();
}
if (check >= 0) {
Node<K,V>[] tab, nt; int n, sc;
while (s >= (long)(sc = sizeCtl) && (tab = table) != null &&
(n = tab.length) < MAXIMUM_CAPACITY) {
int rs = resizeStamp(n);
if (sc < 0) {
if ((sc >>> RESIZE_STAMP_SHIFT) != rs || sc == rs + 1 ||
sc == rs + MAX_RESIZERS || (nt = nextTable) == null ||
transferIndex <= 0)
break;
if (U.compareAndSwapInt(this, SIZECTL, sc, sc + 1))
transfer(tab, nt);
}
else if (U.compareAndSwapInt(this, SIZECTL, sc,
(rs << RESIZE_STAMP_SHIFT) + 2))
transfer(tab, null);
s = sumCount();
}
}
}
结论:
其实unsafe在日常业务中几乎用不到,首先偏移量的计算,一旦失败,直接导致进程crash,风险高,也违背的java的设计初衷,其次java官方从来就不推荐使用,每次涉及到更新都会再三强调:我不保证更新啊,我不保证以后还能用啊,以后升级出了问题你自己负责啊。
unsafe的堆外内存不受jvm的直接管理,jvm存的也只是一个地址的值,手动管理如果忘记释放,会导致内存泄漏。
unsafe的使用java本身已经封装的很好,需要的出口IO netty也处理的非常到位。自己去手撸一个是没有必要的,但是深入理解,对于打通点到点的传输补足盲区是有必要的。