SolrCloud中采用了DistributedQueue来同步节点间的状态信息。SolrCloud中总共会在3个地方保存队列信息:
/overseer/queue:保存每个shard的配置信息,以及状态信息(recovering,recovery_failed,active,down,sync)
对应的生产者为:ZKController中的overseerJobQueue
消费者:Overseer.ClusterStateUpdater中的stateUpdateQueue;
/overseer/queue-work:正在处理中的消息,首先shard中信息会先保存到/overseer/queue下面,进行处理时会移到/overseer/queue-work中,处理完后消息之后在从/overseer/queue-work中删除
生产者:stateUpdateQueue
消费者:Overseer.ClusterStateUpdater中的workQueue
/overseer/collection-queue-work:只有在create,delete,reload collection时候才会触发到此队列,只是保存相应的collection操作信息。待collection操作成功之后,还会涉及到/overseer/queue和/overseer/queue-work之中
生产者:ZKController中的overseerCollectionQueue
消费者:OverseerCollectionProcessor中的workQueue
DistributedQueue源码:
package org.apache.solr.cloud;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.TreeMap;
import java.util.concurrent.CountDownLatch;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.data.ACL;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* zookeeper可以通过Sequence Nodes来实现分布式队列
* 采用sequential在client在申请创建该节点时,zk会自动在节点路径末尾添加递增序号,
*/
public class DistributedQueue {
private static final Logger LOG = LoggerFactory.getLogger(DistributedQueue.class);
private final String dir; //队列的上层访问路径
private SolrZkClient zookeeper;
private List<ACL> acl = ZooDefs.Ids.OPEN_ACL_UNSAFE; // 访问控制列表,这里是一个完全打开的ACL,允许任何客户端对znode进行读写
private final String prefix = "qn-"; // 节点的名称前缀
public DistributedQueue(SolrZkClient zookeeper, String dir, List<ACL> acl) {
this.dir = dir;
if (acl != null) {
this.acl = acl;
}
this.zookeeper = zookeeper;
}
/**
* 对序列号进行排序,实现分布式队列的关键,保证了消息的有序性
*/
private TreeMap<Long,String> orderedChildren(Watcher watcher)
throws KeeperException, InterruptedException {
TreeMap<Long,String> orderedChildren = new TreeMap<Long,String>();
List<String> childNames = null;
try {
childNames = zookeeper.getChildren(dir, watcher, true); // 节点名称
} catch (KeeperException.NoNodeException e) {
throw e;
}
for (String childName : childNames) {
try {
if (!childName.regionMatches(0, prefix, 0, prefix.length())) {
LOG.warn("Found child node with improper name: " + childName);
continue;
}
String suffix = childName.substring(prefix.length());
Long childId = new Long(suffix); // 递增的序列号
orderedChildren.put(childId, childName);
} catch (NumberFormatException e) {
LOG.warn("Found child node with improper format : " + childName + " "
+ e, e);
}
}
return orderedChildren;
}
/**
* 返回队首元素
*/
public byte[] element() throws NoSuchElementException, KeeperException,
InterruptedException {
TreeMap<Long,String> orderedChildren;
while (true) {
try {
orderedChildren = orderedChildren(null);
} catch (KeeperException.NoNodeException e) {
throw new NoSuchElementException();
}
if (orderedChildren.size() == 0) throw new NoSuchElementException();
for (String headNode : orderedChildren.values()) {
if (headNode != null) {
try {
return zookeeper.getData(dir + "/" + headNode, null, null, true);
} catch (KeeperException.NoNodeException e) {
// Another client removed the node first, try next
}
}
}
}
}
/**
* 删除队首元素
*/
public byte[] remove() throws NoSuchElementException, KeeperException,
InterruptedException {
TreeMap<Long,String> orderedChildren;
// Same as for element. Should refactor this.
while (true) {
try {
orderedChildren = orderedChildren(null);
} catch (KeeperException.NoNodeException e) {
throw new NoSuchElementException();
}
if (orderedChildren.size() == 0) throw new NoSuchElementException();
for (String headNode : orderedChildren.values()) {
String path = dir + "/" + headNode;
try {
byte[] data = zookeeper.getData(path, null, null, true);
zookeeper.delete(path, -1, true);
return data;
} catch (KeeperException.NoNodeException e) {
// Another client deleted the node first.
}
}
}
}
/**
* zk的watch机制,没什么特别只是添加了个日志的debug
*/
private class LatchChildWatcher implements Watcher {
CountDownLatch latch;
public LatchChildWatcher() {
latch = new CountDownLatch(1);
}
public void process(WatchedEvent event) {
LOG.debug("Watcher fired on path: " + event.getPath() + " state: "
+ event.getState() + " type " + event.getType());
latch.countDown();
}
public void await() throws InterruptedException {
latch.await();
}
}
/**
* 出队操作
*/
public byte[] take() throws KeeperException, InterruptedException {
TreeMap<Long,String> orderedChildren;
// Same as for element. Should refactor this.
while (true) {
LatchChildWatcher childWatcher = new LatchChildWatcher();
try {
orderedChildren = orderedChildren(childWatcher);
} catch (KeeperException.NoNodeException e) {
zookeeper.create(dir, new byte[0], acl, CreateMode.PERSISTENT, true);
continue;
}
if (orderedChildren.size() == 0) { // 如果orderedChildren为0的话,则等待
childWatcher.await();
continue;
}
/**
* 对于失败的delete操作,client转向处理下一个node
*/
for (String headNode : orderedChildren.values()) {
String path = dir + "/" + headNode;
try {
byte[] data = zookeeper.getData(path, null, null, true);
zookeeper.delete(path, -1, true);
return data;
} catch (KeeperException.NoNodeException e) { // 这个删除操作有可能失败,因为可能有其他的消费者已经成功的获取该znode
// Another client deleted the node first.
}
}
// 如果最后还没有成功的delete一个item,则在重新orderedChildren()
}
}
/**
* 入队操作
* 不需要任何的锁来保证client对同一个znode的操作有序性。由zk负责按顺序分配序列号
*/
public boolean offer(byte[] data) throws KeeperException,
InterruptedException {
for (;;) {
try {
zookeeper.create(dir + "/" + prefix, data, acl, CreateMode.PERSISTENT_SEQUENTIAL, true);
return true;
} catch (KeeperException.NoNodeException e) {
try {
zookeeper.create(dir, new byte[0], acl, CreateMode.PERSISTENT, true);
} catch (KeeperException.NodeExistsException ne) {
//someone created it
}
}
}
}
/**
* 返回队首信息,如果队列为空,则返回null
*/
public byte[] peek() throws KeeperException, InterruptedException {
try {
return element();
} catch (NoSuchElementException e) {
return null;
}
}
/**
* block为true的时候,如果队列为空,则会一直阻塞,直到有数据返回
*/
public byte[] peek(boolean block) throws KeeperException, InterruptedException {
if (!block) {
return peek();
}
TreeMap<Long,String> orderedChildren;
while (true) {
LatchChildWatcher childWatcher = new LatchChildWatcher();
try {
orderedChildren = orderedChildren(childWatcher);
} catch (KeeperException.NoNodeException e) {
zookeeper.create(dir, new byte[0], acl, CreateMode.PERSISTENT, true);
continue;
}
if (orderedChildren.size() == 0) {
childWatcher.await();
continue;
}
for (String headNode : orderedChildren.values()) {
String path = dir + "/" + headNode;
try {
byte[] data = zookeeper.getData(path, null, null, true);
return data;
} catch (KeeperException.NoNodeException e) {
// Another client deleted the node first.
}
}
}
}
/**
* 删除队首,如果队列为空,则返回null
*/
public byte[] poll() throws KeeperException, InterruptedException {
try {
return remove();
} catch (NoSuchElementException e) {
return null;
}
}
public static void main(String[] args) throws KeeperException, InterruptedException {
SolrZkClient client = new SolrZkClient("localhost", 5*1000);
DistributedQueue queue = new DistributedQueue(client, "/overseer/queue", null);
queue.offer("test".getBytes());
System.out.println(new String(queue.take()));
}
}