假设有个任务是要对cassandra数据做迁移,我们先将数据写入kafka中,然后让另一方从kafka中消费数据存到其他数据库中就行了。由于cassandra查询限制比较多,查询占用的cpu资源大,如果不停歇的查数据,肯定要死机。因此,我们想通过分页进行处理(cassandra分页是个麻烦事,有兴趣可以看看别人的博客),比如每次分页1000条。我们分别起两个线程CassandraTask和KafakaTask,然后中间通过BatchBlockingQueue将他们两个线程解耦,写一批就读一批,依次进行下去。这样就能比较好控制断点续传等操作了。
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
/**
* Describe:
* Author: JerryChii.
* Date: 2016/9/21
*/
public class ConcurrentBatchQueue<E> {
final List<E> items;
/** Main lock guarding all access */
final ReentrantLock lock;
/** Condition for waiting take */
private final Condition putProcess;
/** Condition for waiting put */
private final Condition takeProcess;
public ConcurrentBatchQueue() {
items = new ArrayList<>();
lock = new ReentrantLock(false);
putProcess = lock.newCondition();
takeProcess = lock.newCondition();
}
public List<E> take() throws InterruptedException {
final ReentrantLock lock = this.lock;
lock.lockInterruptibly();
try {
while (items.size() == 0)
takeProcess.await();
return dequeue();
} finally {
lock.unlock();
}
}
/** 这里如果有需要,要考虑以下一直给empty list的情况,可以在外面的逻辑中判断 */
public void put(List<E> toPut) throws InterruptedException {
final ReentrantLock lock = this.lock;
lock.lockInterruptibly();
try {
while (items.size() != 0)
putProcess.await();
enqueue(toPut);
} finally {
lock.unlock();
}
}
private List<E> dequeue() {
List newList = new ArrayList();
newList.addAll(items);
items.clear();
putProcess.signal();
return newList;
}
private void enqueue(List<E> values) {
checkNotNull(values);
items.addAll(values);
takeProcess.signal();
}
private static void checkNotNull(Object v) {
if (v == null)
throw new NullPointerException();
}
}