- Disruptor是一个高性能队列,其描述、评测、原理都在下面参考文档中。下面我们一步步分析一下源码是怎么实现的
最新源码需要使用jdk11才能运行
生成者消费者
首选我们用BlockingQueue 实现最简单的生成者消费者,Disruptor本身就是做相同的功能,但实现了更高的性能
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
public class ProducerAndConsumer
{
private final BlockingQueue<Integer> queue = new ArrayBlockingQueue<>(10000);
private final ExecutorService executor = Executors.newFixedThreadPool(10);
private volatile boolean run = true;
private void producer()
{
executor.execute(() ->
{
for (int i = 1; i <= 100; i++)
{
try
{
Thread.sleep(1);
}
catch (InterruptedException e)
{
e.printStackTrace();
}
queue.add(i);
System.out.println(Thread.currentThread().getName() + "生产者:" + i);
}
});
}
private void consumer()
{
executor.execute(() ->
{
while (run)
{
try
{
Integer take = queue.poll(10, TimeUnit.MILLISECONDS);
if(take == null) continue;
System.out.println(Thread.currentThread().getName() + "消费者:" + take);
Thread.sleep(1);
}
catch (InterruptedException e)
{
e.printStackTrace();
}
}
});
}
public static void main(String[] args) throws InterruptedException
{
ProducerAndConsumer demo = new ProducerAndConsumer();
demo.consumer();
demo.consumer();
demo.consumer();
demo.producer();
Thread.sleep(10000);
demo.run = false;
demo.executor.shutdown();
}
}
缓存行测试
如果数组中的一个值被加载到缓存中,它会自动加载另外7个。因此你能非常快的遍历这个数组。事实上,你可以非常快速的遍历在连续内存块中分配的任意数据结构。 这是Disruptor使用的原理之一
public class CacheLineEffect
{
public static void main(String[] args)
{
long[][] arr = new long[1024 * 1024][];
for (int i = 0; i < 1024 * 1024; i++)
{
arr[i] = new long[8];
for (int j = 0; j < 8; j++)
{
arr[i][j] = 0L;
}
}
long sum = 0L;
long marked = System.currentTimeMillis();
for (int i = 0; i < 1024 * 1024; i++)
{
for (int j = 0; j < 8; j++)
{
sum += arr[i][j];
}
}
System.out.println("Loop time:" + (System.currentTimeMillis() - marked) + "ms");
marked = System.currentTimeMillis();
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 1024 * 1024; j++)
{
sum += arr[j][i];
}
}
System.out.println("Loop times:" + (System.currentTimeMillis() - marked) + "ms");
}
}
伪共享
使用了ValuePadding共享机制比没有使用共享机制ValueNoPadding,速度快了4倍左右。为了提升性能,我们故意在对象属性中添加了padding,防止多线程竞争时导致缓存行不断失效,Disruptor也是采用这种方式填充数组
public class FalseSharing implements Runnable
{
private int arrayIndex = 0;
private static ValueNoPadding[] longs;
public FalseSharing(int arrayIndex)
{
this.arrayIndex = arrayIndex;
}
public static void main(String[] args) throws InterruptedException
{
//分别设置1-10个线程组测试
for (int i = 1; i <= 10; i++)
{
System.gc();
long start = System.currentTimeMillis();
runTest(i);
System.out.println("Thread num:" + i + "duration =" + (System.currentTimeMillis() - start));
}
}
private static void runTest(int num) throws InterruptedException
{
Thread[] threads = new Thread[num];
longs = new ValueNoPadding[num];
for (int i = 0; i < longs.length; i++)
{
longs[i] = new ValueNoPadding();
}
//每个线程去修改自己线程的值
for (int i = 0; i < threads.length; i++)
{
threads[i] = new Thread(new FalseSharing(i));
}
for (final Thread thread : threads)
{
thread.start();
}
for (final Thread thread : threads)
{
thread.join();
}
}
public void run()
{
long iterations = 500L * 1000L * 100L;
long i = iterations + 1;
while (0 != --i)
{
longs[arrayIndex].value = 0L;
}
}
public static class ValuePadding
{
protected long p1, p2, p3, p4, p5, p6, p7;
protected volatile long value = 0L;
protected long p9, p10, p11, p12, p13, p14;
protected long p15;
}
public static class ValueNoPadding
{
protected volatile long value = 0L;
}
}
Disruptor使用方法
disruptor 可以指定多个生产者、消费者,下面是单个生成者,多消费者的例子,而且给一个消费者提供了异常处理类
import com.lmax.disruptor.RingBuffer;
import com.lmax.disruptor.YieldingWaitStrategy;
import com.lmax.disruptor.dsl.Disruptor;
import com.lmax.disruptor.dsl.ProducerType;
import java.util.concurrent.Executors;
public class DisruptorMain
{
public static void main(String[] args)
{
//创建工厂
LongEventFactory eventFactory = new LongEventFactory();
//创建ringBuffer 大小
int ringBufferSize = 8;
//创建Disruptor
Disruptor<LongEvent> disruptor = new Disruptor<>(eventFactory, ringBufferSize, Executors.defaultThreadFactory(), ProducerType.SINGLE, new YieldingWaitStrategy());
//连接消费者
LongEventHandler longEventHandler = new LongEventHandler();
//可以添加多个消费者
disruptor.handleEventsWith(longEventHandler, new OtherEventHandler());
//针对某个消费者,添加异常处理
disruptor.handleExceptionsFor(longEventHandler).with(new LongExceptionHandler());
//启动
disruptor.start();
//创建RingBuffer容器
RingBuffer<LongEvent> ringBuffer = disruptor.getRingBuffer();
// 创建生产者
LongEventProducer producer = new LongEventProducer(ringBuffer);
//发送数据
for (long i = 1; i <= 100; i++)
{
producer.onData(i);
}
//关闭disruptor
disruptor.shutdown();
}
}
import com.lmax.disruptor.EventFactory;
public class LongEventFactory implements EventFactory<LongEvent>
{
@Override
public LongEvent newInstance()
{
return new LongEvent();
}
}
public class LongEvent
{
private Long value;
public Long getValue()
{
return value;
}
public void setValue(final Long value)
{
this.value = value;
}
}
import com.lmax.disruptor.EventHandler;
//定义事件消费者
public class LongEventHandler implements EventHandler<LongEvent>
{
@Override
public void onEvent(final LongEvent event, final long sequence, final boolean endOfBatch) throws Exception
{
System.out.println(Thread.currentThread().getName() + " long 消费者:" + event.getValue());
}
}
import com.lmax.disruptor.RingBuffer;
//定义生产者
public class LongEventProducer
{
private final RingBuffer<LongEvent> ringBuffer;
public LongEventProducer(RingBuffer<LongEvent> ringBuffer)
{
this.ringBuffer = ringBuffer;
}
public void onData(long data)
{
//ringBuffer 事件队列 下一个槽
long sequence = ringBuffer.next();
try
{
//获取下一个槽的内存地址
LongEvent longEvent = ringBuffer.get(sequence);
//往对应的地址设置值
longEvent.setValue(data);
//通知消费者目前最新的消费位置点位置,并唤醒线程
ringBuffer.publish(sequence);
System.out.println(Thread.currentThread().getName() + "生产者发送数据:" + data);
Thread.sleep(10);
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
import com.lmax.disruptor.ExceptionHandler;
public class LongExceptionHandler implements ExceptionHandler
{
@Override
public void handleEventException(final Throwable ex, final long sequence, final Object event)
{
System.out.println("ex:" + ex + " sequence:" + sequence + " event:" + event);
}
@Override
public void handleOnStartException(final Throwable ex)
{
System.out.println("start ex:" + ex);
}
@Override
public void handleOnShutdownException(final Throwable ex)
{
System.out.println("shudown ex:" + ex);
}
}
import com.lmax.disruptor.EventHandler;
public class OtherEventHandler implements EventHandler<LongEvent>
{
@Override
public void onEvent(final LongEvent event, final long sequence, final boolean endOfBatch) throws Exception
{
System.out.println(Thread.currentThread().getName() + " other 消费者:" + event.getValue());
}
}
disruptor 原理示例
为了更容易理解源码,下面做了一个超级简化版,单生成者,单消费者,并把可以优化的地方标注了一下
消费者的等待策略
名称 | 措施 | 适用场景 |
---|---|---|
BlockingWaitStrategy | 加锁 | CPU资源紧缺,吞吐量和延迟并不重要的场景 |
BusySpinWaitStrategy | 自旋 | 通过不断重试,减少切换线程导致的系统调用,而降低延迟。推荐在线程绑定到固定的CPU的场景下使用 |
PhasedBackoffWaitStrategy | 自旋 + yield + 自定义策略 | CPU资源紧缺,吞吐量和延迟并不重要的场景 |
SleepingWaitStrategy | 自旋 + yield + sleep | 性能和CPU资源之间有很好的折中。延迟不均匀 |
TimeoutBlockingWaitStrategy | 加锁,有超时限制 | CPU资源紧缺,吞吐量和延迟并不重要的场景 |
YieldingWaitStrategy | 自旋 + yield + 自旋 | 性能和CPU资源之间有很好的折中。延迟比较均匀 |
public class DisruptorDemo
{
private int[] entries = new int[8]; //加padding,消除缓存行 影响
//sequence不需要用volatile,消费者用 使用内存屏障VarHandle获取
//多个生成者会有个available Buffer 数组 标记是否有数据
private volatile long sequence = -1L; //生产者序列号, long类型,即使100万QPS的处理速度,也需要30万年才能用完
private long minSequence = -1L; //消费者序列号 //多个消费者用Sequence数组存储每个线程消费的位置
private void producer()
{
new Thread(() ->
{
for (int i = 1; i <= 20; i++)
{
sequence++; //多个生成生产者通过CAS,分配不同的分片
//这里还需要判断消费者是否消费了,这里就不处理了
entries[(int) (sequence % 8)] = i; //取数组使用UNSAFE.getObject
System.out.println("发送消息:" + i);
}
}).start();
}
private void consumer()
{
new Thread(() ->
{
while (true) {
minSequence++;
while (minSequence > sequence) //设置不同等待策略,这里是自旋
{}
System.out.println("消费者:" + entries[(int) (minSequence % 8)]);//多个生成者,sequence取available Buffer最大可用值
}
}).start();
}
public static void main(String[] args) throws InterruptedException
{
DisruptorDemo demo = new DisruptorDemo();
demo.producer();
demo.consumer();
Thread.sleep(1000);
}
}
源码解析
Disruptor.java
多个消费者 创建多个BatchEventProcessor
EventHandlerGroup<T> createEventProcessors(
final Sequence[] barrierSequences,
final EventHandler<? super T>[] eventHandlers)
{
checkNotStarted();
final Sequence[] processorSequences = new Sequence[eventHandlers.length];
final SequenceBarrier barrier = ringBuffer.newBarrier(barrierSequences);
//多个消费者处理
for (int i = 0, eventHandlersLength = eventHandlers.length; i < eventHandlersLength; i++)
{
final EventHandler<? super T> eventHandler = eventHandlers[i];
final BatchEventProcessor<T> batchEventProcessor =
new BatchEventProcessor<>(ringBuffer, barrier, eventHandler);
//异常函数处理
if (exceptionHandler != null)
{
batchEventProcessor.setExceptionHandler(exceptionHandler);
}
consumerRepository.add(batchEventProcessor, eventHandler, barrier);
processorSequences[i] = batchEventProcessor.getSequence();
}
updateGatingSequencesForNextInChain(barrierSequences, processorSequences);
return new EventHandlerGroup<>(this, consumerRepository, processorSequences);
}
SingleProducerSequencerPad.java
生产者获取下一个sequence
//long sequence = ringBuffer.next();
public long next(final int n)
{
if (n < 1 || n > bufferSize)
{
throw new IllegalArgumentException("n must be > 0 and < bufferSize");
}
long nextValue = this.nextValue;
long nextSequence = nextValue + n;
long wrapPoint = nextSequence - bufferSize;
long cachedGatingSequence = this.cachedValue;
if (wrapPoint > cachedGatingSequence || cachedGatingSequence > nextValue)
{
cursor.setVolatile(nextValue); // StoreLoad fence
//这里要看各个消费者 有没有消费完数据
long minSequence;
while (wrapPoint > (minSequence = Util.getMinimumSequence(gatingSequences, nextValue)))
{
LockSupport.parkNanos(1L); // TODO: Use waitStrategy to spin?
}
this.cachedValue = minSequence;
}
this.nextValue = nextSequence;
return nextSequence;
}
RingBufferFields.java
初始化数组
static
{
//获取数组中元素的增量地址
final int scale = UNSAFE.arrayIndexScale(Object[].class); //4个字节
if (POINTER_SIZE_32_BIT == scale)
{
REF_ELEMENT_SHIFT = BITSHIFT_MULTIPLIER_FOUR; //4
}
else if (POINTER_SIZE_64_BIT == scale)
{
REF_ELEMENT_SHIFT = BITSHIFT_MULTIPLIER_EIGHT;
}
else
{
throw new IllegalStateException("Unknown pointer size");
}
//计算填充值 32 = 128/4
BUFFER_PAD = BUFFER_PADDING_BYTES / scale; //32
// Including the buffer pad in the array base offset
//UNSAFE.arrayBaseOffset 获取数组第一个元素的偏移地址
REF_ARRAY_BASE = UNSAFE.arrayBaseOffset(Object[].class) + BUFFER_PADDING_BYTES; //144 = 16 + 128(字节)
}
RingBufferPad.java
生产者取出要存入数据所在的内存位置
//获取下一个槽的内存地址
//LongEvent longEvent = ringBuffer.get(sequence);
//往对应的地址设置值
//longEvent.setValue(data);
@SuppressWarnings("unchecked")
protected final E elementAt(final long sequence)
{
return (E) UNSAFE.getObject(entries, REF_ARRAY_BASE + ((sequence & indexMask) << REF_ELEMENT_SHIFT)); //144+ ?
}
SingleProducerSequencerPad.java
//通知消费者目前最新的消费位置点位置,并唤醒线程
//ringBuffer.publish(sequence);
@Override
public void publish(final long sequence)
{
//设置Sequence 值,即告诉消费者现在可以消费的位置点
cursor.set(sequence);
//如果线程睡着了,则唤醒他们
waitStrategy.signalAllWhenBlocking();
}
消费者线程处理逻辑
BatchEventProcessor.java
private void processEvents()
{
T event = null;
//每个消费者都记录自己消费的位置
long nextSequence = sequence.get() + 1L;
while (true)
{
final long startOfBatchSequence = nextSequence;
try
{
try
{
//根据不同的策略获取可以用的sequence,这里会有线程自旋或者等待
final long availableSequence = sequenceBarrier.waitFor(nextSequence);
if (batchStartAware != null && availableSequence >= nextSequence)
{
batchStartAware.onBatchStart(availableSequence - nextSequence + 1);
}
//当生产者大于消费者时才开始消费
while (nextSequence <= availableSequence)
{ //获取自定义的对象
event = dataProvider.get(nextSequence);
//调用自定义的handler 处理器
eventHandler.onEvent(event, nextSequence, nextSequence == availableSequence);
nextSequence++;
}
retriesAttempted = 0;
sequence.set(availableSequence);
}
Sequence.java
设置值、获取值 采用jdk9新的内存屏障VarHandle
public long get()
{
long value = this.value;
VarHandle.acquireFence();//内存屏障
return value;
}
public void set(final long value)
{
VarHandle.releaseFence();//内存屏障
this.value = value;
}
多线程sequence
MultiProducerSequencer.java
public MultiProducerSequencer(final int bufferSize, final WaitStrategy waitStrategy)
{
super(bufferSize, waitStrategy);
//创建availableBuffer 并填充-1,用来标记是否有数据
availableBuffer = new int[bufferSize];
Arrays.fill(availableBuffer, -1);
indexMask = bufferSize - 1;
indexShift = Util.log2(bufferSize);
}
@Override
public long next(final int n)
{
if (n < 1 || n > bufferSize)
{
throw new IllegalArgumentException("n must be > 0 and < bufferSize");
}
//通过cas 获取独享的空间
long current = cursor.getAndAdd(n);
long nextSequence = current + n;
long wrapPoint = nextSequence - bufferSize;
long cachedGatingSequence = gatingSequenceCache.get();
//这里要看各个消费者 有没有消费完数据
if (wrapPoint > cachedGatingSequence || cachedGatingSequence > current)
{
long gatingSequence;
while (wrapPoint > (gatingSequence = Util.getMinimumSequence(gatingSequences, current)))
{
LockSupport.parkNanos(1L); // TODO, should we spin based on the wait strategy?
}
gatingSequenceCache.set(gatingSequence);
}
return nextSequence;
}
//多线程获取最新的可消费数据,还要看对应的availableBuffer位置是否可用
@Override
public long getHighestPublishedSequence(final long lowerBound, final long availableSequence)
{
for (long sequence = lowerBound; sequence <= availableSequence; sequence++)
{
//返回第一个不可用的位置-1
if (!isAvailable(sequence))
{
return sequence - 1;
}
}
return availableSequence;
}
参考文档
高性能队列——Disruptor
高性能无锁并发框架Disruptor
LMAX Disruptor简介
LMAX Disruptor 原理
说说Java的Unsafe类
JAVA中神奇的双刃剑–Unsafe
JUC整理笔记四之梳理VarHandle
Java高级进阶多线程学习之路(五)cache line 缓存行