2021SC@SDUSC
** 提示
源码展示中笔者会将重载的方法省略,只展示关键方法
探索SolrClient
sdudoc的检索功能主要是通过solr实现的,本人确实很好奇solr框架的源码是如何实现的。
首先我们需要在 application.properties 中配置solr的服务器主机地址。
我们使用 solr 主要是通过SolrClient 类进行操作的,solr 的源码大致如下:
public abstract class SolrClient implements Serializable, Closeable {
// 可以用来将Object转为Document对象
private DocumentObjectBinder binder;
public UpdataResponse add(...) {
UpdateRequest req = new UpdateRequest();
...
req.setCommitWithin(...);
return req.process(this, ...);
}
public UpdateResponse addBeans(String collection, Collection<?> beans, int commitWithinMs) throws SolrServerException, IOException {
DocumentObjectBinder binder = this.getBinder();
ArrayList<SolrInputDocument> docs = new ArrayList<>(beans.size());
for (Object bean : beans) {
docs.add(binder.toSolrInputDocument(bean));
}
return add(collection, docs, commitWithinMs);
}
public UpdateResponse commit(String collection, boolean waitFlush, boolean waitSearcher, boolean softCommit)
throws SolrServerException, IOException {
return new UpdateRequest()
.setAction(UpdateRequest.ACTION.COMMIT, waitFlush, waitSearcher, softCommit)
.process(this, collection);
}
// 其他的都是通过 UpdateRequest 操作的,这里不多赘述
...
}
可见,SolrClient 是一个用来包装更新请求 UpdateRequest 的类,便于使用者进行相关的操作。
下面我们来看一下 UpdateRequest 类
可以发现,UpdateRequest有很多成员:
private Map<SolrInputDocument,Map<String,Object>> documents = null;
private Iterator<SolrInputDocument> docIterator = null;
private Map<String,Map<String,Object>> deleteById = null;
private List<String> deleteQuery = null;
private boolean isLastDocInBatch = false;
并且构造器内容为:
public UpdateRequest() {
super(METHOD.POST, "/update");
}
public UpdateRequest(String url) {
super(METHOD.POST, url);
}
UpdateRequest 的基本操作如下:
// 无参数add
public UpdateRequest add(final SolrInputDocument doc) {
Objects.requireNonNull(doc, "Cannot add a null SolrInputDocument");
if (documents == null) {
documents = new LinkedHashMap<>();
}
documents.put(doc, null);
return this;
}
// 有参数add
public UpdateRequest add(final SolrInputDocument doc, Integer commitWithin, Boolean overwrite) {
Objects.requireNonNull(doc, "Cannot add a null SolrInputDocument");
if (documents == null) {
documents = new LinkedHashMap<>();
}
Map<String,Object> params = new HashMap<>(2);
if (commitWithin != null) params.put(COMMIT_WITHIN, commitWithin);
if (overwrite != null) params.put(OVERWRITE, overwrite);
documents.put(doc, params);
return this;
}
// 多个对象add
public UpdateRequest add(final Collection<SolrInputDocument> docs) {
if (documents == null) {
documents = new LinkedHashMap<>();
}
for (SolrInputDocument doc : docs) {
Objects.requireNonNull(doc, "Cannot add a null SolrInputDocument");
documents.put(doc, null);
}
return this;
}
// 按id删除
public UpdateRequest deleteById(String id) {
if (deleteById == null) {
deleteById = new LinkedHashMap<>();
}
deleteById.put(id, null);
return this;
}
// 有参按id删除
public UpdateRequest deleteById(String id, String route, Long version) {
if (deleteById == null) {
deleteById = new LinkedHashMap<>();
}
Map<String, Object> params = (route == null && version == null) ? null : new HashMap<>(1);
if (version != null)
params.put(VER, version);
if (route != null)
params.put(_ROUTE_, route);
deleteById.put(id, params);
return this;
}
// 多重按id删除
public UpdateRequest deleteById(List<String> ids) {
if (deleteById == null) {
deleteById = new LinkedHashMap<>();
}
for (String id : ids) {
deleteById.put(id, null);
}
return this;
}
// 将查询的结果删除
public UpdateRequest deleteByQuery(String q) {
if (deleteQuery == null) {
deleteQuery = new ArrayList<>();
}
deleteQuery.add(q);
return this;
}
// ...
// 提交
public UpdateResponse commit(SolrClient client, String collection) throws IOException, SolrServerException {
if (params == null)
params = new ModifiableSolrParams();
params.set(UpdateParams.COMMIT, "true");
return process(client, collection);
}
// ...
// 剩下的是一些额外操作,比如生成路由,转换为XML格式
观察 UpdateRequest 对象的操作,我们可以发现
上面的成员是用来记录操作的键值对和参数
存储的对象是 SolrInputDocument
UpdateRequest类中并没有提到 process() 或 addAction() 等方法,说明在方法的定义在他的父类中。
观察 AbstractUpdateRequest,其构造器:
public abstract class AbstractUpdateRequest extends SolrRequest<UpdateResponse> implements IsUpdateRequest {
protected ModifiableSolrParams params;
protected int commitWithin = -1;
public enum ACTION {
COMMIT,
OPTIMIZE
}
public AbstractUpdateRequest(METHOD m, String path) {
super(m, path);
}
}
发现参数继续向上传递。
这里我们可以发现 setAction 方法:
public AbstractUpdateRequest setAction(ACTION action, boolean waitFlush, boolean waitSearcher, boolean softCommit, int maxSegments ) {
if (params == null)
params = new ModifiableSolrParams();
if( action == ACTION.OPTIMIZE ) {
params.set( UpdateParams.OPTIMIZE, "true" );
params.set(UpdateParams.MAX_OPTIMIZE_SEGMENTS, maxSegments);
}
else if( action == ACTION.COMMIT ) {
params.set( UpdateParams.COMMIT, "true" );
params.set( UpdateParams.SOFT_COMMIT, String.valueOf(softCommit));
}
params.set( UpdateParams.WAIT_SEARCHER, String.valueOf(waitSearcher));
return this;
}
首先,通过观察源码可知 ModifiableSolrParams 是一个封装好的Map对象,键值是<String, String[]>
这个方法主要就是确定我这一次 request 的请求类型是 COMMIT (提交)还是OPTIMIZE (优化) ,并将相关参数加入到 params 中
此外,还有两个方法:
public AbstractUpdateRequest rollback() {
if (params == null)
params = new ModifiableSolrParams();
params.set( UpdateParams.ROLLBACK, "true" );
return this;
}
public void setParam(String param, String value) {
if (params == null)
params = new ModifiableSolrParams();
params.set(param, value);
}
一个是指出我这次 request 是一次回滚,另外一个是对于提供参数的操作的封装。
看来 process 方法在 SolrRequest 中
观察 SolrRequest :
public abstract class SolrRequest<T extends SolrResponse> implements Serializable {
public final T process(SolrClient client, String collection) throws SolrServerException, IOException {
long startNanos = System.nanoTime();
T res = createResponse(client);
res.setResponse(client.request(this, collection));
long endNanos = System.nanoTime();
res.setElapsedTime(TimeUnit.NANOSECONDS.toMillis(endNanos - startNanos));
return res;
}
}
其中 createResponse 由其子类实现,返回一个UpdateResponse,是一个存放结果的数据结构,并且 request 方法也是由 SolrClient 的实现类实现。
寻找 SolrClient 的实现类,我们注意到:
public class DelegationTokenHttpSolrClient extends HttpSolrClient {
public final static String DELEGATION_TOKEN_PARAM = "delegation";
...
@Override
protected HttpRequestBase createMethod(final SolrRequest request, String collection) throws IOException, SolrServerException {
SolrParams params = request.getParams();
if (params != null && params.getParams(DELEGATION_TOKEN_PARAM) != null) {
throw new IllegalArgumentException(DELEGATION_TOKEN_PARAM + " parameter not supported");
}
return super.createMethod(request, collection);
}
@Override
public void setQueryParams(Set<String> queryParams) {
queryParams = queryParams == null ?
Collections.unmodifiableSet(new HashSet<String>(Arrays.asList(DELEGATION_TOKEN_PARAM))): queryParams;
if (!queryParams.contains(DELEGATION_TOKEN_PARAM)) {
queryParams = new HashSet<String>(queryParams);
queryParams.add(DELEGATION_TOKEN_PARAM);
queryParams = Collections.unmodifiableSet(queryParams);
}
super.setQueryParams(queryParams);
}
}
实现类中实现了 createMethod 和 setQueryParams
而他的父类中 HttpSolrClient 实现了 request 方法:
public class HttpSolrClient extends BaseHttpSolrClient {
public NamedList<Object> request(final SolrRequest request, final ResponseParser processor, String collection)
throws SolrServerException, IOException {
HttpRequestBase method = createMethod(request, collection);
setBasicAuthHeader(request, method);
if (request.getHeaders() != null) {
Map<String, String> headers = request.getHeaders();
for (Map.Entry<String, String> entry : headers.entrySet()) {
method.setHeader(entry.getKey(), entry.getValue());
}
}
return executeMethod(method, request.getUserPrincipal(), processor, isV2ApiRequest(request));
}
// ...
protected NamedList<Object> executeMethod(HttpRequestBase method, Principal userPrincipal, final ResponseParser processor, final boolean isV2Api) throws SolrServerException {
method.addHeader("User-Agent", AGENT);
org.apache.http.client.config.RequestConfig.Builder requestConfigBuilder = HttpClientUtil.createDefaultRequestConfigBuilder();
if (soTimeout != null) {
requestConfigBuilder.setSocketTimeout(soTimeout);
}
if (connectionTimeout != null) {
requestConfigBuilder.setConnectTimeout(connectionTimeout);
}
if (followRedirects != null) {
requestConfigBuilder.setRedirectsEnabled(followRedirects);
}
method.setConfig(requestConfigBuilder.build());
HttpEntity entity = null;
InputStream respBody = null;
boolean shouldClose = true;
try {
// Execute the method.
HttpClientContext httpClientRequestContext = HttpClientUtil.createNewHttpClientRequestContext();
if (userPrincipal != null) {
// Normally the context contains a static userToken to enable reuse resources.
// However, if a personal Principal object exists, we use that instead, also as a means
// to transfer authentication information to Auth plugins that wish to intercept the request later
httpClientRequestContext.setUserToken(userPrincipal);
}
final HttpResponse response = httpClient.execute(method, httpClientRequestContext);
int httpStatus = response.getStatusLine().getStatusCode();
// Read the contents
entity = response.getEntity();
respBody = entity.getContent();
Header ctHeader = response.getLastHeader("content-type");
String contentType;
if (ctHeader != null) {
contentType = ctHeader.getValue();
} else {
contentType = "";
}
// handle some http level checks before trying to parse the response
switch (httpStatus) {
case HttpStatus.SC_OK:
case HttpStatus.SC_BAD_REQUEST:
case HttpStatus.SC_CONFLICT: // 409
break;
case HttpStatus.SC_MOVED_PERMANENTLY:
case HttpStatus.SC_MOVED_TEMPORARILY:
if (!followRedirects) {
throw new SolrServerException("Server at " + getBaseURL()
+ " sent back a redirect (" + httpStatus + ").");
}
break;
default:
if (processor == null || "".equals(contentType)) {
throw new RemoteSolrException(baseUrl, httpStatus, "non ok status: " + httpStatus
+ ", message:" + response.getStatusLine().getReasonPhrase(),
null);
}
}
if (processor == null || processor instanceof InputStreamResponseParser) {
// no processor specified, return raw stream
NamedList<Object> rsp = new NamedList<>();
rsp.add("stream", respBody);
rsp.add("closeableResponse", response);
// Only case where stream should not be closed
shouldClose = false;
return rsp;
}
String procCt = processor.getContentType();
if (procCt != null) {
String procMimeType = ContentType.parse(procCt).getMimeType().trim().toLowerCase(Locale.ROOT);
String mimeType = ContentType.parse(contentType).getMimeType().trim().toLowerCase(Locale.ROOT);
if (!procMimeType.equals(mimeType)) {
// unexpected mime type
String msg = "Expected mime type " + procMimeType + " but got " + mimeType + ".";
Header encodingHeader = response.getEntity().getContentEncoding();
String encoding;
if (encodingHeader != null) {
encoding = encodingHeader.getValue();
} else {
encoding = "UTF-8"; // try UTF-8
}
try {
msg = msg + " " + IOUtils.toString(respBody, encoding);
} catch (IOException e) {
throw new RemoteSolrException(baseUrl, httpStatus, "Could not parse response with encoding " + encoding, e);
}
throw new RemoteSolrException(baseUrl, httpStatus, msg, null);
}
}
NamedList<Object> rsp = null;
String charset = EntityUtils.getContentCharSet(response.getEntity());
try {
rsp = processor.processResponse(respBody, charset);
} catch (Exception e) {
throw new RemoteSolrException(baseUrl, httpStatus, e.getMessage(), e);
}
Object error = rsp == null ? null : rsp.get("error");
if (error != null && (isV2Api || String.valueOf(getObjectByPath(error, true, errPath)).endsWith("ExceptionWithErrObject"))) {
throw RemoteExecutionException.create(baseUrl, rsp);
}
if (httpStatus != HttpStatus.SC_OK && !isV2Api) {
NamedList<String> metadata = null;
String reason = null;
try {
NamedList err = (NamedList) rsp.get("error");
if (err != null) {
reason = (String) err.get("msg");
if(reason == null) {
reason = (String) err.get("trace");
}
metadata = (NamedList<String>)err.get("metadata");
}
} catch (Exception ex) {}
if (reason == null) {
StringBuilder msg = new StringBuilder();
msg.append(response.getStatusLine().getReasonPhrase())
.append("\n\n")
.append("request: ")
.append(method.getURI());
reason = java.net.URLDecoder.decode(msg.toString(), UTF_8);
}
RemoteSolrException rss = new RemoteSolrException(baseUrl, httpStatus, reason, null);
if (metadata != null) rss.setMetadata(metadata);
throw rss;
}
return rsp;
} catch (ConnectException e) {
throw new SolrServerException("Server refused connection at: "
+ getBaseURL(), e);
} catch (SocketTimeoutException e) {
throw new SolrServerException(
"Timeout occurred while waiting response from server at: "
+ getBaseURL(), e);
} catch (IOException e) {
throw new SolrServerException(
"IOException occurred when talking to server at: " + getBaseURL(), e);
} finally {
if (shouldClose) {
Utils.consumeFully(entity);
}
}
}
}
封装了网络请求
所以使用 Solr 需要在服务器端部署 Solr 的服务器,然后在客户端需要指出服务器的URL,Solr 会通过访问服务器的方式获取持久化数据。