Java notify和notifyAll源码分析与性能对比
一、源码剖析
首先,看看在synchronizer.cpp中notify和notifyall的实现:
void ObjectSynchronizer::notify(Handle obj, TRAPS) {
if (UseBiasedLocking) {
BiasedLocking::revoke_and_rebias(obj, false, THREAD);
assert(!obj->mark()->has_bias_pattern(), "biases should be revoked by now");
}
markOop mark = obj->mark();
if (mark->has_locker() && THREAD->is_lock_owned((address)mark->locker())) {
return;
}
ObjectSynchronizer::inflate(THREAD, obj())->notify(THREAD);
}
// NOTE: see comment of notify()
void ObjectSynchronizer::notifyall(Handle obj, TRAPS) {
if (UseBiasedLocking) {
BiasedLocking::revoke_and_rebias(obj, false, THREAD);
assert(!obj->mark()->has_bias_pattern(), "biases should be revoked by now");
}
markOop mark = obj->mark();
if (mark->has_locker() && THREAD->is_lock_owned((address)mark->locker())) {
return;
}
ObjectSynchronizer::inflate(THREAD, obj())->notifyAll(THREAD);
}
两者实现一样,看看objectMonitor.cpp中的实现。
notify:
// Consider:
// If the lock is cool (cxq == null && succ == null) and we're on an MP system
// then instead of transferring a thread from the WaitSet to the EntryList
// we might just dequeue a thread from the WaitSet and directly unpark() it.
void ObjectMonitor::notify(TRAPS) {
CHECK_OWNER();
if (_WaitSet == NULL) {
TEVENT (Empty-Notify) ;
return ;
}
DTRACE_MONITOR_PROBE(notify, this, object(), THREAD);
int Policy = Knob_MoveNotifyee ;
Thread::SpinAcquire (&_WaitSetLock, "WaitSet - notify") ;
ObjectWaiter * iterator = DequeueWaiter() ;
if (iterator != NULL) {
TEVENT (Notify1 - Transfer) ;
guarantee (iterator->TState == ObjectWaiter::TS_WAIT, "invariant") ;
guarantee (iterator->_notified == 0, "invariant") ;
if (Policy != 4) {
iterator->TState = ObjectWaiter::TS_ENTER ;
}
iterator->_notified = 1 ;
Thread * Self = THREAD;
iterator->_notifier_tid = Self->osthread()->thread_id();
ObjectWaiter * List = _EntryList ;
if (List != NULL) {
assert (List->_prev == NULL, "invariant") ;
assert (List->TState == ObjectWaiter::TS_ENTER, "invariant") ;
assert (List != iterator, "invariant") ;
}
if (Policy == 0) { // prepend to EntryList
if (List == NULL) {
iterator->_next = iterator->_prev = NULL ;
_EntryList = iterator ;
} else {
List->_prev = iterator ;
iterator->_next = List ;
iterator->_prev = NULL ;
_EntryList = iterator ;
}
} else
if (Policy == 1) { // append to EntryList
if (List == NULL) {
iterator->_next = iterator->_prev = NULL ;
_EntryList = iterator ;
} else {
// CONSIDER: finding the tail currently requires a linear-time walk of
// the EntryList. We can make tail access constant-time by converting to
// a CDLL instead of using our current DLL.
ObjectWaiter * Tail ;
for (Tail = List ; Tail->_next != NULL ; Tail = Tail->_next) ;
assert (Tail != NULL && Tail->_next == NULL, "invariant") ;
Tail->_next = iterator ;
iterator->_prev = Tail ;
iterator->_next = NULL ;
}
} else
if (Policy == 2) { // prepend to cxq
// prepend to cxq
if (List == NULL) {
iterator->_next = iterator->_prev = NULL ;
_EntryList = iterator ;
} else {
iterator->TState = ObjectWaiter::TS_CXQ ;
for (;;) {
ObjectWaiter * Front = _cxq ;
iterator->_next = Front ;
if (Atomic::cmpxchg_ptr (iterator, &_cxq, Front) == Front) {
break ;
}
}
}
} else
if (Policy == 3) { // append to cxq
iterator->TState = ObjectWaiter::TS_CXQ ;
for (;;) {
ObjectWaiter * Tail ;
Tail = _cxq ;
if (Tail == NULL) {
iterator->_next = NULL ;
if (Atomic::cmpxchg_ptr (iterator, &_cxq, NULL) == NULL) {
break ;
}
} else {
while (Tail->_next != NULL) Tail = Tail->_next ;
Tail->_next = iterator ;
iterator->_prev = Tail ;
iterator->_next = NULL ;
break ;
}
}
} else {
ParkEvent * ev = iterator->_event ;
iterator->TState = ObjectWaiter::TS_RUN ;
OrderAccess::fence() ;
ev->unpark() ;
}
if (Policy < 4) {
iterator->wait_reenter_begin(this);
}
// _WaitSetLock protects the wait queue, not the EntryList. We could
// move the add-to-EntryList operation, above, outside the critical section
// protected by _WaitSetLock. In practice that's not useful. With the
// exception of wait() timeouts and interrupts the monitor owner
// is the only thread that grabs _WaitSetLock. There's almost no contention
// on _WaitSetLock so it's not profitable to reduce the length of the
// critical section.
}
Thread::SpinRelease (&_WaitSetLock) ;
if (iterator != NULL && ObjectMonitor::_sync_Notifications != NULL) {
ObjectMonitor::_sync_Notifications->inc() ;
}
}
notifyAll:
void ObjectMonitor::notifyAll(TRAPS) {
CHECK_OWNER();
ObjectWaiter* iterator;
if (_WaitSet == NULL) {
TEVENT (Empty-NotifyAll) ;
return ;
}
DTRACE_MONITOR_PROBE(notifyAll, this, object(), THREAD);
int Policy = Knob_MoveNotifyee ;
int Tally = 0 ;
Thread::SpinAcquire (&_WaitSetLock, "WaitSet - notifyall") ;
for (;;) {
iterator = DequeueWaiter () ;
if (iterator == NULL) break ;
TEVENT (NotifyAll - Transfer1) ;
++Tally ;
// Disposition - what might we do with iterator ?
// a. add it directly to the EntryList - either tail or head.
// b. push it onto the front of the _cxq.
// For now we use (a).
guarantee (iterator->TState == ObjectWaiter::TS_WAIT, "invariant") ;
guarantee (iterator->_notified == 0, "invariant") ;
iterator->_notified = 1 ;
Thread * Self = THREAD;
iterator->_notifier_tid = Self->osthread()->thread_id();
if (Policy != 4) {
iterator->TState = ObjectWaiter::TS_ENTER ;
}
ObjectWaiter * List = _EntryList ;
if (List != NULL) {
assert (List->_prev == NULL, "invariant") ;
assert (List->TState == ObjectWaiter::TS_ENTER, "invariant") ;
assert (List != iterator, "invariant") ;
}
if (Policy == 0) { // prepend to EntryList
if (List == NULL) {
iterator->_next = iterator->_prev = NULL ;
_EntryList = iterator ;
} else {
List->_prev = iterator ;
iterator->_next = List ;
iterator->_prev = NULL ;
_EntryList = iterator ;
}
} else
if (Policy == 1) { // append to EntryList
if (List == NULL) {
iterator->_next = iterator->_prev = NULL ;
_EntryList = iterator ;
} else {
// CONSIDER: finding the tail currently requires a linear-time walk of
// the EntryList. We can make tail access constant-time by converting to
// a CDLL instead of using our current DLL.
ObjectWaiter * Tail ;
for (Tail = List ; Tail->_next != NULL ; Tail = Tail->_next) ;
assert (Tail != NULL && Tail->_next == NULL, "invariant") ;
Tail->_next = iterator ;
iterator->_prev = Tail ;
iterator->_next = NULL ;
}
} else
if (Policy == 2) { // prepend to cxq
// prepend to cxq
iterator->TState = ObjectWaiter::TS_CXQ ;
for (;;) {
ObjectWaiter * Front = _cxq ;
iterator->_next = Front ;
if (Atomic::cmpxchg_ptr (iterator, &_cxq, Front) == Front) {
break ;
}
}
} else
if (Policy == 3) { // append to cxq
iterator->TState = ObjectWaiter::TS_CXQ ;
for (;;) {
ObjectWaiter * Tail ;
Tail = _cxq ;
if (Tail == NULL) {
iterator->_next = NULL ;
if (Atomic::cmpxchg_ptr (iterator, &_cxq, NULL) == NULL) {
break ;
}
} else {
while (Tail->_next != NULL) Tail = Tail->_next ;
Tail->_next = iterator ;
iterator->_prev = Tail ;
iterator->_next = NULL ;
break ;
}
}
} else {
ParkEvent * ev = iterator->_event ;
iterator->TState = ObjectWaiter::TS_RUN ;
OrderAccess::fence() ;
ev->unpark() ;
}
if (Policy < 4) {
iterator->wait_reenter_begin(this);
}
// _WaitSetLock protects the wait queue, not the EntryList. We could
// move the add-to-EntryList operation, above, outside the critical section
// protected by _WaitSetLock. In practice that's not useful. With the
// exception of wait() timeouts and interrupts the monitor owner
// is the only thread that grabs _WaitSetLock. There's almost no contention
// on _WaitSetLock so it's not profitable to reduce the length of the
// critical section.
}
Thread::SpinRelease (&_WaitSetLock) ;
if (Tally != 0 && ObjectMonitor::_sync_Notifications != NULL) {
ObjectMonitor::_sync_Notifications->inc(Tally) ;
}
}
对比可知,在当前线程SpinAcquire到_WaitSetLock之后,notify只调用一次DequeueWaiter方法获取到一个等候者,而notifyAll需要循环DequeueWaiter直至waitset为空。尔后的操作大致一样,根据Policy把等候者插入到临界区的EntryList中,最后自旋释放_WaitSetLock。
代码上看来,notify和notifyAll在性能上可能还是会有一定的差别。
二、测试验证
实际上到底有没有性能差异,有多少差别呢?我们可以用JMH来验证一下。
测试代码如下:
package co.speedar.infra.test;
import java.util.concurrent.CountDownLatch;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
@BenchmarkMode(value = { Mode.AverageTime, Mode.Throughput })
@State(Scope.Thread)
public class NotifyNotifyAllTest {
private volatile boolean wait1 = true;
private final Object lock1 = new Object();
@Param({ "1", "10", "50" })
private int length;
@Benchmark
public void testNotify() throws InterruptedException {
doTestNotify(length, false);
}
@Benchmark
public void testNotifyAll() throws InterruptedException {
doTestNotify(length, true);
}
private void doTestNotify(final int count, final boolean isAll) throws InterruptedException {
CountDownLatch latch = new CountDownLatch(count);
class Waiter implements Runnable {
public void run() {
synchronized (lock1) {
latch.countDown();
while (wait1) {
try {
lock1.wait();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
}
for (int i = 0; i < count; i++) {
new Thread(new Waiter()).start();
}
latch.await();
class Notifier implements Runnable {
public void run() {
synchronized (lock1) {
if (wait1) {
wait1 = false;
}
if (isAll) {
lock1.notifyAll();
} else {
lock1.notify();
}
}
}
}
for (int i = 0; i < count; i++) {
new Thread(new Notifier()).start();
}
}
public static void main(String[] args) throws RunnerException {
Options opt = new OptionsBuilder().include(NotifyNotifyAllTest.class.getSimpleName()).forks(1).threads(8)
.warmupIterations(5).measurementIterations(5).build();
new Runner(opt).run();
}
}
线程Waiter启动时候尝试获取锁,然后进入wait状态,全部开启完毕后latch阻塞完成,开始启动Notifier线程唤醒等待中的Waiter线程,执行结果如下:
Benchmark | (length) | Mode | Cnt | Score | Error | Units |
---|---|---|---|---|---|---|
NotifyNotifyAllTest.testNotify | 1 | thrpt | 5 | 14025.549 | ±150.552 | ops/s |
NotifyNotifyAllTest.testNotify | 10 | thrpt | 5 | 1449.138 | ±10.629 | ops/s |
NotifyNotifyAllTest.testNotify | 50 | thrpt | 5 | 288.984 | ±2.692 | ops/s |
NotifyNotifyAllTest.testNotifyAll | 1 | thrpt | 5 | 14235.773 | ±305.752 | ops/s |
NotifyNotifyAllTest.testNotifyAll | 10 | thrpt | 5 | 1452.429 | ±12.703 | ops/s |
NotifyNotifyAllTest.testNotifyAll | 50 | thrpt | 5 | 295.088 | ±2.200 | ops/s |
NotifyNotifyAllTest.testNotify | 1 | avgt | 5 | 0.001 | ±0.001 | s/op |
NotifyNotifyAllTest.testNotify | 10 | avgt | 5 | 0.006 | ±0.001 | s/op |
NotifyNotifyAllTest.testNotify | 50 | avgt | 5 | 0.027 | ±0.001 | s/op |
NotifyNotifyAllTest.testNotifyAll | 1 | avgt | 5 | 0.001 | ±0.001 | s/op |
NotifyNotifyAllTest.testNotifyAll | 10 | avgt | 5 | 0.006 | ±0.001 | s/op |
NotifyNotifyAllTest.testNotifyAll | 50 | avgt | 5 | 0.028 | ±0.001 | s/op |
三、结论
可以看到,吞吐量和执行时间都没有太大的差别,从性能角度来说,没必要用notify替换notifyAll。