Solrj是Solr搜索服务器的一个比较基础的客户端工具,可以非常方便地与Solr搜索服务器进行交互,最基本的功能就是管理Solr索引,包括添加、更新、删除和查询等。对于一些比较基础的应用,用Solj基本够用,而且你可以非常容易地通过使用Solrj的API实现与Solr搜索服务器进行交互,实现对Solr的基本管理功能。如果你的应用比较复杂,可以扩展Solrj来满足需要。
下面是一个使用Solrj的API实现与Solr服务器交互的工具类SolrPostServer,能够实现索引的添加、更新、删除和查询功能。SolrPostServer类中两个内部类是与访问MongoDB的配置和工具。
在实际应用中,对于是否进行commit,可以有两种方式:
- 一种是直接在客户端进行计算,亦即,进行索引时计算添加的文档数,满足设置的值则进行手动commit,这种方式比较灵活,你可以根据搜索服务器的运行状况选择合理的commit文档数量;
- 另一种是,直接在Solr搜索服务器上进行配置,一般来说,对索引进行大批量更新,一般不会选择在搜索服务器业务繁忙的时候进行,所以能够自动进行commit也便利了对索引的管理,更新文档可以完全可以实现自动化处理。
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler">
<maxPendingDeletes>10000</maxPendingDeletes>
<autoCommit>
<maxDocs>20</maxDocs>
<maxTime>86000</maxTime>
</autoCommit>
</requestHandler>
上面autoCommit中的maxDocs指定的pending多少个文档后执行一次commit,而maxTime指定了多长时间间隔进行一次commit,一般这两个选项只需要配置一个即可满足需要。另外,每次commit会将最近的更新生效,但是如果一次commit操作尚未完成,又达到了下一次commit的时刻,这样做会严重影响索引的吞吐量。
实现代码如下所示:
package org.shirdrn.solr.solrj;
import java.io.IOException;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.httpclient.HttpClient;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.solr.client.solrj.ResponseParser;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.Mongo;
import com.mongodb.MongoException;
/**
* Solr server for indexes operations.
*
* @author shirdrn
* @date 2011-12-20
*/
public class SolrPostServer {
private static final Logger LOG = Logger.getLogger(SolrPostServer.class);
private CommonsHttpSolrServer server;
private ResponseParser responseParser;
private MongoConfig mongoConfig;
private String[] collectionNames;
private int maxCommitCount = 100;
private boolean manualOptimize = true;
private boolean manualCommit = false;
private Collection<SolrInputDocument> docContainer = new ArrayList<SolrInputDocument>();
private static int totalCount = 0;
public SolrPostServer(String url, HttpClient httpClient, MongoConfig mongoConfig) {
try {
if(httpClient==null) {
server = new CommonsHttpSolrServer(url);
server.setSoTimeout(500000); // socket read timeout
server.setConnectionTimeout(5000);
server.setDefaultMaxConnectionsPerHost(10);
server.setMaxTotalConnections(100);
server.setAllowCompression(true);
server.setMaxRetries(1); // defaults to 0. > 1 not recommended.
} else {
server = new CommonsHttpSolrServer(url, httpClient);
}
} catch (MalformedURLException e) {
e.printStackTrace();
}
this.mongoConfig = mongoConfig;
initialize();
}
/**
* Initialize the {@link CommonsHttpSolrServer}'s basic parameters.
*/
private void initialize() {
if(responseParser!=null) {
server.setParser(responseParser);
} else {
server.setParser(new XMLResponseParser());
}
}
@SuppressWarnings("unchecked")
public void postUpdate() {
DBCursor cursor = null;
try {
for (String c : collectionNames) {
LOG.info("MongoDB collection name: " + c);
DBCollection collection = MongoHelper.newHelper(mongoConfig).getCollection(c);
DBObject q = new BasicDBObject();
cursor = collection.find(q);
while(cursor.hasNext()) {
try {
Map<Object, Object> m = cursor.next().toMap();
if(manualCommit) {
add(m, true);
} else {
add(m, false);
}
++totalCount;
LOG.info("Add fragment: _id = " + m.get("_id").toString());
} catch (IOException e) {
e.printStackTrace();
}
}
cursor.close();
}
LOG.info("Add totalCount: " + totalCount);
finallyCommit();
optimize(manualOptimize);
} catch (MongoException e) {
e.printStackTrace();
} catch (SolrServerException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Detele lucene {@link Document} by IDs.
* @param strings
*/
public void deleteById(List<String> strings) {
try {
server.deleteById(strings);
} catch (SolrServerException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Detele lucene {@link Document} by query.
* @param query
*/
public void deleteByQuery(String query) {
try {
server.deleteByQuery(query);
} catch (SolrServerException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Query.
* @param params
* @param fields
* @return
*/
public List<Map<String, Object>> query(SolrParams params, String[] fields) {
List<Map<String, Object>> results = new ArrayList<Map<String, Object>>();
try {
SolrDocumentList documents = server.query(params).getResults();
Iterator<SolrDocument> iter = documents.iterator();
while(iter.hasNext()) {
SolrDocument doc = iter.next();
Map<String, Object> map = new HashMap<String, Object>();
for(String field : fields) {
map.put(field, doc.getFieldValue(field));
}
results.add(map);
}
} catch (SolrServerException e) {
e.printStackTrace();
}
return results;
}
/**
* When controlling the committing action at client side, finally execute committing.
* @throws SolrServerException
* @throws IOException
*/
private void finallyCommit() throws SolrServerException, IOException {
if(!docContainer.isEmpty()) {
server.add(docContainer);
commit(false, false);
}
}
/**
* Commit.
* @param waitFlush
* @param waitSearcher
* @throws SolrServerException
* @throws IOException
*/
public void commit(boolean waitFlush, boolean waitSearcher) {
try {
server.commit(waitFlush, waitSearcher);
} catch (SolrServerException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* When controlling the optimizing action at client side, finally execute optimizing.
* @param waitFlush
* @param waitSearcher
* @throws SolrServerException
* @throws IOException
*/
public void optimize(boolean waitFlush, boolean waitSearcher) {
try {
server.optimize(waitFlush, waitSearcher);
commit(waitFlush, waitSearcher);
} catch (Exception e) {
LOG.error("Encounter error when optimizing.", e);
try {
server.rollback();
} catch (SolrServerException e1) {
e1.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
}
}
}
/**
* Optimize.
* @param optimize
* @throws SolrServerException
* @throws IOException
*/
private void optimize(boolean optimize) {
if(optimize) {
optimize(true, true);
}
}
/**
* Add a {@link SolrInputDocument} or collect object and add to the a collection for batch updating
* from a mongodb's recored, a Map object.
* @param m
* @param oneByOne
* @throws SolrServerException
* @throws IOException
*/
private void add(Map<Object, Object> m, boolean oneByOne) throws SolrServerException, IOException {
SolrInputDocument doc = createDocument(m);
if(oneByOne) {
server.add(doc);
} else {
docContainer.add(doc);
if(docContainer.size()>maxCommitCount) {
server.add(docContainer);
server.commit(false, false);
docContainer = new ArrayList<SolrInputDocument>();
}
}
}
/**
* Create a {@link SolrInputDocument} object.
* @param record
* @return
*/
private SolrInputDocument createDocument(Map<Object, Object> record) {
String id = record.get("_id").toString();
String articleId = (String) record.get("articleId");
String title = (String) record.get("title");
String url = (String) record.get("url");
String spiderName = (String) record.get("spiderName");
String fragment = makeFragment((BasicDBObject) record.get("fragment"));
String word = (String) record.get("word");
int pictureCount = (Integer) record.get("pictureCount");
int selectedCount = (Integer) record.get("selectedCount");
int fragmentSize = (Integer) record.get("fragmentSize");
SolrInputDocument doc = new SolrInputDocument();
doc.addField("_id", id, 1.0f);
doc.addField("articleId", articleId, 1.0f);
doc.addField("title", title, 1.0f);
doc.addField("url", url, 1.0f);
doc.addField("spiderName", spiderName, 1.0f);
doc.addField("fragment", fragment, 1.0f);
doc.addField("word", word, 1.0f);
// Additional processing for lucene payload metadata.
doc.addField("pictureCount", word + "|" + pictureCount);
doc.addField("coverage", word + "|" + (float)selectedCount/fragmentSize);
return doc;
}
@SuppressWarnings("unchecked")
private String makeFragment(BasicDBObject fragment) {
StringBuilder builder = new StringBuilder();
Iterator<Map.Entry<Integer, String>> iter = fragment.toMap().entrySet().iterator();
while(iter.hasNext()) {
Map.Entry<Integer, String> entry = iter.next();
builder.append(entry.getValue().trim()).append("<br>");
}
return builder.toString();
}
/**
* Set {@link ResponseParser}, default value is {@link XMLResponseParser}.
* @param responseParser
*/
public void setResponseParser(ResponseParser responseParser) {
this.responseParser = responseParser;
}
/**
* Pulling document resource from multiple collections of MongoDB.
* @param collectionNames
*/
public void setCollectionNames(String[] collectionNames) {
this.collectionNames = collectionNames;
}
public void setMaxCommitCount(int maxCommitCount) {
this.maxCommitCount = maxCommitCount;
}
public void setManualCommit(boolean manualCommit) {
this.manualCommit = manualCommit;
}
public void setManualOptimize(boolean manualOptimize) {
this.manualOptimize = manualOptimize;
}
/**
* Mongo database configuration.
*
* @author shirdrn
* @date 2011-12-20
*/
public static class MongoConfig implements Serializable {
private static final long serialVersionUID = -3028092758346115702L;
private String host;
private int port;
private String dbname;
private String collectionName;
public MongoConfig(String host, int port, String dbname, String collectionName) {
super();
this.host = host;
this.port = port;
this.dbname = dbname;
this.collectionName = collectionName;
}
@Override
public boolean equals(Object obj) {
MongoConfig other = (MongoConfig) obj;
return host.equals(other.host) && port==other.port
&& dbname.equals(other.dbname) && collectionName.equals(other.collectionName);
}
}
/**
* Mongo database utility.
*
* @author shirdrn
* @date 2011-12-20
*/
static class MongoHelper {
private static Mongo mongo;
private static MongoHelper helper;
private MongoConfig mongoConfig;
private MongoHelper(MongoConfig mongoConfig) {
super();
this.mongoConfig = mongoConfig;
}
public synchronized static MongoHelper newHelper(MongoConfig mongoConfig) {
try {
if(helper==null) {
helper = new MongoHelper(mongoConfig);
mongo = new Mongo(mongoConfig.host, mongoConfig.port);
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
if(mongo!=null) {
mongo.close();
}
}
});
}
} catch (Exception e) {
e.printStackTrace();
}
return helper;
}
public DBCollection getCollection(String collectionName) {
DBCollection c = null;
try {
c = mongo.getDB(mongoConfig.dbname).getCollection(collectionName);
} catch (Exception e) {
e.printStackTrace();
}
return c;
}
}
}
下面,我们可以通过写一个测试用例测试一下。
首先,我的Solr搜索服务器已经部署好并启动成功,对应的url为http://192.168.0.197:8080/server/fragment/。测试用例如下所示:
package org.shirdrn.solr.solrj;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import junit.framework.TestCase;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.MapSolrParams;
import org.shirdrn.solr.solrj.SolrPostServer.MongoConfig;
@SuppressWarnings("deprecation")
public class TestSolrPostServer extends TestCase {
SolrPostServer myServer;
MongoConfig config;
String url;
String[] collectionNames;
@Override
protected void setUp() throws Exception {
url = "http://192.168.0.197:8080/server/fragment/";
config = new MongoConfig("192.168.0.184", 27017, "fragment", "");
myServer = new SolrPostServer(url, null, config);
myServer.setMaxCommitCount(100);
}
@Override
protected void tearDown() throws Exception {
super.tearDown();
}
public void testPostUpdate() {
collectionNames = new String[] {
"sina",
"lvping",
"daodao",
"go2eu",
"mafengwo",
"lotour",
"17u",
"sohu",
"baseSe",
"bytravel"
};
myServer.setCollectionNames(collectionNames);
myServer.setManualCommit(true);
myServer.setManualOptimize(false);
myServer.postUpdate();
}
public void testPostDelete() {
List<String> strings = new ArrayList<String>();
strings.add("4ef051342c4117a38f63ee97");
strings.add("4ef051322c4117a38f63ee36");
strings.add("4ef051a42c4117a38f63fb51");
strings.add("4ef050d92c4117a38f63dda4");
strings.add("4ef051fe2c4117a38f640bc8");
strings.add("4ef048ef2c4117a38f6207ce");
strings.add("4ef049062c4117a38f620e13");
strings.add("4ef046f12c4117a38f6185c0");
myServer.deleteById(strings);
myServer.commit(false, false);
myServer.optimize(true, false);
}
@SuppressWarnings({ "rawtypes", "unchecked" })
public void testQuery() {
Map map = new HashMap();
map.put(CommonParams.Q, "法国");
map.put(CommonParams.START, "0");
map.put(CommonParams.ROWS, "10");
map.put(CommonParams.FQ, "word:卢浮宫");
SolrParams params = new MapSolrParams(map);
List<Map<String, Object>> results = myServer.query(params, new String[] {"_id", "title", "url"});
assertEquals(10, results.size());
}
}
在实际开发的过程中,使用Solrj客户端可以非常容易为测试做一些基本操作,如创建索引,测试Solr基本参数及其开发定制Solr相关接口(Retrieval、Highlighting、Faceted Search、Clustering等等)。