import java.util.concurrent.Exchanger;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
public class ExchangerTest {
private static volatile boolean isDone = false;
static class ExchangerProducer implements Runnable {
private Exchanger<Integer> exchanger;
private static int data = 1;
ExchangerProducer(Exchanger<Integer> exchanger) {
this.exchanger = exchanger;
}
@Override
public void run() {
while (!Thread.interrupted() && !isDone) {
for (int i = 1; i <= 3; i++) {
try {
TimeUnit.SECONDS.sleep(1);
data = i;
System.out.println("Producer before: " + data);
data = exchanger.exchange(data);
System.out.println("Producer after: " + data);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
isDone = true;
}
}
}
static class ExchangerConsumer implements Runnable {
private Exchanger<Integer> exchanger;
private static int data = 0;
ExchangerConsumer(Exchanger<Integer> exchanger) {
this.exchanger = exchanger;
}
@Override
public void run() {
while (!Thread.interrupted() && !isDone) {
try {
data = 0;
System.out.println("Consumer before: " + data);
TimeUnit.SECONDS.sleep(1);
data = exchanger.exchange(data);
} catch (InterruptedException e) {
e.printStackTrace();
}
System.out.println("Consumer after: " + data);
}
}
}
public static void main(String[] args) {
ExecutorService exec = Executors.newCachedThreadPool();
Exchanger<Integer> exchanger = new Exchanger<Integer>();
ExchangerProducer producer = new ExchangerProducer(exchanger);
ExchangerConsumer consumer = new ExchangerConsumer(exchanger);
exec.execute(producer);
exec.execute(consumer);
exec.shutdown();
try {
exec.awaitTermination(30, TimeUnit.SECONDS);
} catch (InterruptedException e) {
}
}
}
当一个线程到达exchange调用点时,如果它的伙伴线程此前已经调用了此方法,
那么它的伙伴会被调度唤醒并与之进行对象交换,然后各自返回。
如果它的伙伴还没到达交换点,那么当前线程将会被挂起,直至伙伴线程到达——完成交换正常返回;
或者当前线程被中断——抛出中断异常;又或者是等候超时——抛出超时异常。
1.问题描述
最近接到外部项目组向我组提出的接口需求,需要查询我们业务办理量的统计情况。我们系统目前的情况是,有一个日增长十多万、总数据量为千万级别的业务办理明细表(xxx_info),每人次的业务办理结果会实时写入其中。以往对外提供的业务统计接口是在每次被调用时候在明细表中执行SQL查询(select、count、where、group by等),响应时间很长,对原生产业务的使用也有很大的影响。于是我决定趁着这次新增接口的上线机会对系统进行优化。
2.优化思路
首先是在明细表之外再建立一个数据统计(xxx_statistics)表,考虑到目前数据库的压力以及公司内部质管流控等因素,暂没有分库存放,仍旧与原明细表放在同一个库。再设置一个定时任务于每日凌晨对明细表进行查询、过滤、统计、排序等操作,把统计结果插入到统计表中。然后对外暴露统计接口查询统计报表。现在的设计与原来的实现相比,虽然牺牲了统计表所占用的少量额外的存储空间(每日新增的十来万条业务办理明细记录经过处理最终会变成几百条统计表的记录),但是却能把select、count这样耗时的数据统计操作放到凌晨时段执行以避开白天的业务办理高峰,分表处理能够大幅降低对生产业务明细表的性能影响,而对外提供的统计接口的查询速度也将得到几个数量级的提升。当然,还有一个缺点是,不能实时提供当天的统计数据,不过这也是双方可以接受的。
3.设计实现
设计一个定时任务,每日凌晨执行。在定时任务中启动两个线程,一个线程负责对业务明细表(xxx_info)进行查询统计,把统计的结果放置在内存缓冲区,另一个线程负责读取缓冲区中的统计结果并插入到业务统计表(xxx_statistics)中。
亲,这样的场景是不是听起来很有感觉?没错!两个线程在内存中批量交换数据,这个事情我们可以使用Exchanger去做!我们马上来看看代码如何实现。
生产者线程:
class ExchangerProducer implements Runnable {
private Exchanger<Set<XXXStatistics>> exchanger;
private Set<XXXStatistics> holder;
private Date fltDate;
private int threshold;
ExchangerProducer(Exchanger<Set<XXXStatistics>> exchanger,
Set<XXXStatistics> holder, Date fltDate, int threshold) {
this.exchanger = exchanger;
this.holder = holder;
this.fltDate = fltDate;
this.threshold = threshold;
}
@Override
public void run() {
try {
while (!Thread.interrupted() && !isDone) {
List<XXXStatistics> temp1 = null;
List<XXXStatistics> temp11 = null;
for (int i = 0; i < allCities.size(); i++) {
try {
temp1 = xxxDao
.findStatistics1(
fltDate, allCities.get(i));
temp11 = xxxDao
.findStatistics2(
fltDate, allCities.get(i),
internationalList);
if (temp1 != null && !temp1.isEmpty()) {
calculationCounter.addAndGet(temp1.size());
if (temp11 != null && !temp11.isEmpty()) {
// merge two lists into temp1
mergeLists(temp1, temp11);
temp11.clear();
temp11 = null;
}
// merge temp1 into holder set
mergeListToSet(holder, temp1);
temp1.clear();
temp1 = null;
}
} catch (Exception e) {
log.error(e, e);
}
// Insert every ${threshold} or the last into database.
if (holder.size() >= threshold
|| i == (allCities.size() - 1)) {
log.info("data collected: \n" + holder);
holder = exchanger.exchange(holder);
log.info("data submitted");
}
}
// all cities are calculated
isDone = true;
}
log.info("calculation job done, calculated: "
+ calculationCounter.get());
} catch (InterruptedException e) {
log.error(e, e);
}
exchanger = null;
holder.clear();
holder = null;
fltDate = null;
}
}
代码说明:
- threshold:缓冲区的容量阀值;
- allCities:城市列表,迭代这个列表作为入参来执行查询统计;
- XXXStatistics:统计数据封装实体类,实现了Serializable和Comparable接口,覆写equals和compareTo方法,以利用TreeSet提供的去重和排序处理;
- isDone:volatile boolean,标识统计任务是否完成;
- holder:TreeSet<XXXStatistics>,存放统计结果的内存缓冲区,容量达到阀值后提交给Exchanger执行exchange操作;
- dao.findStatistics1,dao.findStatistics2:简化的数据库查询统计操作,此处仅供示意;
- calculationCounter:AtomicInteger,标记生产端所提交的记录总数;
- mergeLists,mergeListToSet:内部私有工具方法,把dao查询返回的列表合并到holder中;
消费者线程:
class ExchangerConsumer implements Runnable {
private Exchanger<Set<XXXStatistics>> exchanger;
private Set<XXXStatistics> holder;
ExchangerConsumer(Exchanger<Set<XXXStatistics>> exchanger,
Set<XXXStatistics> holder) {
this.exchanger = exchanger;
this.holder = holder;
}
@Override
public void run() {
try {
List<XXXStatistics> tempList;
while (!Thread.interrupted() && !isDone) {
holder = exchanger.exchange(holder);
log.info("got data: \n" + holder);
if (holder != null && !holder.isEmpty()) {
try {
// insert data into database
tempList = convertSetToList(holder);
insertionCounter.addAndGet(xxxDao
.batchInsertXXXStatistics(tempList));
tempList.clear();
tempList = null;
} catch (Exception e) {
log.error(e, e);
}
// clear the set
holder.clear();
} else {
log.info("wtf, got an empty list");
}
log.info("data processed");
}
log.info("insert job done, inserted: " + insertionCounter.get());
} catch (InterruptedException e) {
log.error(e, e);
}
exchanger = null;
holder.clear();
holder = null;
}
}
代码说明:
- convertSetToList:由于dao接口的限制,需把交换得到的Set转换为List;
- batchInsertXXXStatistics:使用jdbc4的batch update而实现的批量插入dao接口;
- insertionCounter:AtomicInteger,标记消费端插入成功的记录总数;
调度器代码:
public boolean calculateStatistics(Date fltDate) {
// initialization
calculationCounter.set(0);
insertionCounter.set(0);
isDone = false;
exec = Executors.newCachedThreadPool();
Set<XXXStatistics> producerSet = new TreeSet<XXXStatistics>();
Set<XXXStatistics> consumerSet = new TreeSet<XXXStatistics>();
Exchanger<Set<XXXStatistics>> xc = new Exchanger<Set<XXXStatistics>>();
ExchangerProducer producer = new ExchangerProducer(xc, producerSet,
fltDate, threshold);
ExchangerConsumer consumer = new ExchangerConsumer(xc, consumerSet);
// execution
exec.execute(producer);
exec.execute(consumer);
exec.shutdown();
boolean isJobDone = false;
try {
// wait for termination
isJobDone = exec.awaitTermination(calculationTimeoutMinutes,
TimeUnit.MINUTES);
} catch (InterruptedException e) {
log.error(e, e);
}
if (!isJobDone) {
// force shutdown
exec.shutdownNow();
log.error("time elapsed for "
+ calculationTimeoutMinutes
+ " minutes, but still not finished yet, shut it down anyway.");
}
// clean up
exec = null;
producerSet.clear();
producerSet = null;
consumerSet.clear();
consumerSet = null;
xc = null;
producer = null;
consumer = null;
System.gc();
// return the result
if (isJobDone && calculationCounter.get() > 0
&& calculationCounter.get() == insertionCounter.get()) {
return true;
}
return false;
}
代码说明:
调度器的代码就四个步骤:初始化、提交任务并等候处理结果、清理、返回。初始化阶段使用了jdk提供的线程池提交生产者和消费者任务,设置了最长等候时间calculationTimeoutMinutes,如果调度器线程被中断或者任务执行超时,awaitTermination会返回false,此时就强行关闭线程池并记录到日志。统计操作每日凌晨执行一次,所以在任务退出前的清理阶段建议jvm执行gc以尽早释放计算时所产生的垃圾对象。在结果返回阶段,如果查询统计出来的记录条数和插入成功的条数相等则返回true,否则返回false。
4.小结
在这个案例中,使用Exchanger进行批量的双向数据交换可谓恰如其分:生产者在执行新的查询统计任务填入数据到缓冲区的同时,消费者正在批量插入生产者换入的上一次产生的数据,系统的吞吐量得到平滑的提升;计算复杂度、内存消耗、系统性能也能通过相关的参数设置而得到有效的控制(在消费端也可以对holder进行再次分割以控制每次批插入的大小,建议参阅数据库厂商以及数据库驱动包的说明文档以确定jdbc的最优batch update size);代码的实现也很简洁易懂。这些优点,是采用有界阻塞队列所难以达到的。
程序的输出结果与业务紧密相关,就不打印出来了。可以肯定的是,经过了一段时间的摸索调优,内存消耗、执行速度和处理结果还是比较满意的。
原文地址:http://lixuanbin.iteye.com/blog/2166772