solr API请求处理逻辑,reload,create等等使用的都是类似的流程,这个过程中牵扯到了zk的分布式队列以及solr中的overseer的概念。明白这个之后就可以添加自定义API。
概要
solr的API基本上可以分为两类。collection级别的API,比如创建collection时候的CREATE的请求。第二类就是core级别的API,collection创建的时候,最终请求是要创建core的。collection请求分解之后就变成了core级别的API。
solr中的overseer
简单说一下solrcloud的部署逻辑。假如一个部署了5个tomcat,每个tomcat中都部署了一个solrserver,那么集群如何实现呢?solr中的实现方式就是引入了zookeeper。每个solrserver都是一个可以单独运作的节点。每个节点都一个overseer的功能,根据使用zookeeper实现一套选举机制,选出一个leader,这个leader来实现请求的接收。关于选举功能就是,每个overseer在zookeeper中进行一次注册,生成一个序列号,哪个overseer的序列号最小,就是leader。这个节点挂了之后,再进行一次选举。
LeaderElector
选举利用了zookeeper的watch机制,可以简单的理解成为一个监听。节点变动之后,会出发监听,进行重新选举。
private void checkIfIamLeader(final ElectionContext context, boolean replacement) throws KeeperException,
InterruptedException, IOException {
......
......
if (leaderSeqNodeName.equals(seqs.get(0))) {
// I am the leader
// context分为两个ShardLeaderElectionContext和OverseerElectionContext两个概念都会进行选举。
try {
runIamLeaderProcess(context, replacement); // 如果是leader就执行leader任务,也就是处理overseer的逻辑
} catch (KeeperException.NodeExistsException e) {
log.error("node exists",e);
retryElection(context, false);
return;
}
} else {
// I am not the leader - watch the node below me
String toWatch = seqs.get(0);
for (String node : seqs) {
if (leaderSeqNodeName.equals(node)) {
break;
}
toWatch = node;
}
try { // 如果不是leader就创建一个监听ElectionWatcher,如果主节点挂了之后。ElectionWatcher.process()方法就会被调用重新进行选举。
String watchedNode = holdElectionPath + "/" + toWatch;
zkClient.getData(watchedNode, watcher = new ElectionWatcher(context.leaderSeqPath, watchedNode, getSeq(context.leaderSeqPath), context), null, true);
log.info("Watching path {} to know if I could be the leader", watchedNode);
} catch (KeeperException.SessionExpiredException e) {
throw e;
} catch (KeeperException.NoNodeException e) {
// the previous node disappeared, check if we are the leader again
checkIfIamLeader(context, true);
} catch (KeeperException e) {
// we couldn't set our watch for some other reason, retry
log.warn("Failed setting watch", e);
checkIfIamLeader(context, true);
}
}
}
OverseerElectionContext.runLeaderProcess会调用OverSeer线程监听分布式队列workQueue的请求。
@Override
void runLeaderProcess(boolean weAreReplacement, int pauseBeforeStartMs) throws KeeperException,
InterruptedException {
log.info("I am going to be the leader {}", id);
final String id = leaderSeqPath
.substring(leaderSeqPath.lastIndexOf("/") + 1);
ZkNodeProps myProps = new ZkNodeProps("id", id);
zkClient.makePath(leaderPath, Utils.toJSON(myProps),
CreateMode.EPHEMERAL, true);
if(pauseBeforeStartMs >0){
try {
Thread.sleep(pauseBeforeStartMs);
} catch (InterruptedException e) {
Thread.interrupted();
log.warn("Wait interrupted ", e);
}
}
overseer.start(id);
}
请求监听
OverSeer的工作就是监听WorkQueue,实现方式就是在zookeeper创建一个节点,如果有请求添加到这个节点上,OverSeer就会触发。
public void run() {
LeaderStatus isLeader = amILeader();
while (isLeader == LeaderStatus.DONT_KNOW) {
log.debug("am_i_leader unclear {}", isLeader);
isLeader = amILeader(); // not a no, not a yes, try ask again
}
log.info("Starting to work on the main queue");
try {
ZkStateWriter zkStateWriter = null;
ClusterState clusterState = null;
boolean refreshClusterState = true; // let's refresh in the first iteration
while (!this.isClosed) { // SolrServer启动之后就会一直无限循环。
......
......
try {
while (head != null) { // 如果有请求进来
byte[] data = head;
final ZkNodeProps message = ZkNodeProps.load(data);
log.info("processMessage: queueSize: {}, message = {} current state version: {}", stateUpdateQueue.getStats().getQueueLength(), message, clusterState.getZkClusterStateVersion());
// we can batch here because workQueue is our fallback in case a ZK write failed
// 获取处理请求,进行逻辑分发
clusterState = processQueueItem(message, clusterState, zkStateWriter, true, new ZkStateWriter.ZkWriteCallback() {
@Override
public void onEnqueue() throws Exception {
workQueue.offer(data);
}
@Override
public void onWrite() throws Exception {
// remove everything from workQueue
while (workQueue.poll() != null);
}
});
......
......
}
}
core API的分发
比如说CREATE请求,下面的逻辑就是向所有节点发送http请求,这个请求就是core级别的,接收方就会创建对应的core。
private List<ZkWriteCommand> processMessage(ClusterState clusterState,
final ZkNodeProps message, final String operation) {
CollectionParams.CollectionAction collectionAction = CollectionParams.CollectionAction.get(operation);
if (collectionAction != null) {
switch (collectionAction) {
case CREATE:
return Collections.singletonList(new ClusterStateMutator(getZkStateReader()).createCollection(clusterState, message));
case DELETE:
return Collections.singletonList(new ClusterStateMutator(getZkStateReader()).deleteCollection(clusterState, message));
case CREATESHARD:
return Collections.singletonList(new CollectionMutator(getZkStateReader()).createShard(clusterState, message));
case DELETESHARD:
return Collections.singletonList(new CollectionMutator(getZkStateReader()).deleteShard(clusterState, message));
case ADDREPLICA:
return Collections.singletonList(new SliceMutator(getZkStateReader()).addReplica(clusterState, message));
case ADDREPLICAPROP:
return Collections.singletonList(new ReplicaMutator(getZkStateReader()).addReplicaProperty(clusterState, message));
case DELETEREPLICAPROP:
return Collections.singletonList(new ReplicaMutator(getZkStateReader()).deleteReplicaProperty(clusterState, message));
case BALANCESHARDUNIQUE:
ExclusiveSliceProperty dProp = new ExclusiveSliceProperty(clusterState, message);
if (dProp.balanceProperty()) {
String collName = message.getStr(ZkStateReader.COLLECTION_PROP);
return Collections.singletonList(new ZkWriteCommand(collName, dProp.getDocCollection()));
}
break;
case MODIFYCOLLECTION:
CollectionsHandler.verifyRuleParams(zkController.getCoreContainer() ,message.getProperties());
return Collections.singletonList(new CollectionMutator(reader).modifyCollection(clusterState,message));
case MIGRATESTATEFORMAT:
return Collections.singletonList(new ClusterStateMutator(reader).migrateStateFormat(clusterState, message));
default:
throw new RuntimeException("unknown operation:" + operation
+ " contents:" + message.getProperties());
}
} else {
OverseerAction overseerAction = OverseerAction.get(operation);
if (overseerAction == null) {
throw new RuntimeException("unknown operation:" + operation + " contents:" + message.getProperties());
}
switch (overseerAction) {
case STATE:
return Collections.singletonList(new ReplicaMutator(getZkStateReader()).setState(clusterState, message));
case LEADER:
return Collections.singletonList(new SliceMutator(getZkStateReader()).setShardLeader(clusterState, message));
case DELETECORE:
return Collections.singletonList(new SliceMutator(getZkStateReader()).removeReplica(clusterState, message));
case ADDROUTINGRULE:
return Collections.singletonList(new SliceMutator(getZkStateReader()).addRoutingRule(clusterState, message));
case REMOVEROUTINGRULE:
return Collections.singletonList(new SliceMutator(getZkStateReader()).removeRoutingRule(clusterState, message));
case UPDATESHARDSTATE:
return Collections.singletonList(new SliceMutator(getZkStateReader()).updateShardState(clusterState, message));
case QUIT:
if (myId.equals(message.get("id"))) {
log.info("Quit command received {}", LeaderElector.getNodeName(myId));
overseerCollectionConfigSetProcessor.close();
close();
} else {
log.warn("Overseer received wrong QUIT message {}", message);
}
break;
case DOWNNODE:
return new NodeMutator(getZkStateReader()).downNode(clusterState, message);
default:
throw new RuntimeException("unknown operation:" + operation + " contents:" + message.getProperties());
}
}
return Collections.singletonList(ZkStateWriter.NO_OP);
}
Collection API的下发
上面介绍的流程是API请求已经写入分布式队列,然后OverSeer去处理,那么分布式队列是怎么写入请求的呢?还是以CREATE请求为例。
创建请求由CollectionsHandler接收,老套路,执行的方法名称还是叫handleRequestBody。
@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
// Make sure the cores is enabled
CoreContainer cores = getCoreContainer(); // 每个SolrServer由一个Container来同一管理其中的core。
if (cores == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Core container instance missing");
}
// Make sure that the core is ZKAware
if(!cores.isZooKeeperAware()) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"Solr instance is not running in SolrCloud mode.");
}
// Pick the action
SolrParams params = req.getParams();
String a = params.get(CoreAdminParams.ACTION);
if (a != null) {
CollectionAction action = CollectionAction.get(a);
if (action == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown action: " + a);
}
CollectionOperation operation = CollectionOperation.get(action);
log.info("Invoked Collection Action :{} with params {} and sendToOCPQueue={}", action.toLower(), req.getParamString(), operation.sendToOCPQueue);
SolrResponse response = null;
Map<String, Object> props = operation.execute(req, rsp, this);
String asyncId = req.getParams().get(ASYNC);
if (props != null) {
if (asyncId != null) {
props.put(ASYNC, asyncId);
}
props.put(QUEUE_OPERATION, operation.action.toLower());
ZkNodeProps zkProps = new ZkNodeProps(props);
if (operation.sendToOCPQueue) {
response = handleResponse(operation.action.toLower(), zkProps, rsp, operation.timeOut); // 关注一下handleResponse的逻辑
}
else Overseer.getStateUpdateQueue(coreContainer.getZkController().getZkClient()).offer(Utils.toJSON(props));
final String collectionName = zkProps.getStr(NAME);
if (action.equals(CollectionAction.CREATE) && asyncId == null) {
if (rsp.getException() == null) {
waitForActiveCollection(collectionName, zkProps, cores, response);
}
}
}
} else {
throw new SolrException(ErrorCode.BAD_REQUEST, "action is a required param");
}
rsp.setHttpCaching(false);
}
handleResponse
在这里就可以看到,创建的请求被添加到一个分布式的Queue中,后端的Overseer就会从queue中取出这个请求,进行分发。
private SolrResponse handleResponse(String operation, ZkNodeProps m,
SolrQueryResponse rsp, long timeout) throws KeeperException, InterruptedException {
long time = System.nanoTime();
if (m.containsKey(ASYNC) && m.get(ASYNC) != null) {
String asyncId = m.getStr(ASYNC);
if(asyncId.equals("-1")) {
throw new SolrException(ErrorCode.BAD_REQUEST, "requestid can not be -1. It is reserved for cleanup purposes.");
}
NamedList<String> r = new NamedList<>();
if (coreContainer.getZkController().getOverseerCompletedMap().contains(asyncId) ||
coreContainer.getZkController().getOverseerFailureMap().contains(asyncId) ||
coreContainer.getZkController().getOverseerRunningMap().contains(asyncId) ||
overseerCollectionQueueContains(asyncId)) {
r.add("error", "Task with the same requestid already exists.");
} else {
coreContainer.getZkController().getOverseerCollectionQueue()
.offer(Utils.toJSON(m));
}
r.add(CoreAdminParams.REQUESTID, (String) m.get(ASYNC));
SolrResponse response = new OverseerSolrResponse(r);
rsp.getValues().addAll(response.getResponse());
return response;
}
QueueEvent event = coreContainer.getZkController()
.getOverseerCollectionQueue()
.offer(Utils.toJSON(m), timeout); // 就是这个地方了offer就是将请求添加到queue中。
if (event.getBytes() != null) {
SolrResponse response = SolrResponse.deserialize(event.getBytes()); // 阻塞式等待返回的结果。不过也设置了超时时间。
rsp.getValues().addAll(response.getResponse());
SimpleOrderedMap exp = (SimpleOrderedMap) response.getResponse().get("exception");
if (exp != null) {
Integer code = (Integer) exp.get("rspCode");
rsp.setException(new SolrException(code != null && code != -1 ? ErrorCode.getErrorCode(code) : ErrorCode.SERVER_ERROR, (String)exp.get("msg")));
}
return response;
} else {
if (System.nanoTime() - time >= TimeUnit.NANOSECONDS.convert(timeout, TimeUnit.MILLISECONDS)) {
throw new SolrException(ErrorCode.SERVER_ERROR, operation
+ " the collection time out:" + timeout / 1000 + "s");
} else if (event.getWatchedEvent() != null) {
throw new SolrException(ErrorCode.SERVER_ERROR, operation
+ " the collection error [Watcher fired on path: "
+ event.getWatchedEvent().getPath() + " state: "
+ event.getWatchedEvent().getState() + " type "
+ event.getWatchedEvent().getType() + "]");
} else {
throw new SolrException(ErrorCode.SERVER_ERROR, operation
+ " the collection unknown case");
}
}
}
总结
这个流程主要就是要了解,zookeeper的node,watcher 以及根据这种机制构建的OverSeer的处理逻辑。利用这些特性,上层实现了solr的两套API。