/**
* 获取相似性数据
* @param id
* @param mindf
* @param mintf
* @param count
* @return
*/
public List<LableGroup> getRelated(String id, int mindf, int mintf, int count) {
List<LableGroup> lableGroups = new ArrayList<LableGroup>();
SolrQuery solrQuery = new SolrQuery();
try {
solrQuery.setQuery("id:" + id)
.setParam("fl", "id,title,url,score")
.setParam("mlt", "true")
.setParam("mlt.fl", "title")
.setParam("mlt.mindf", String.valueOf(mindf))
.setParam("mlt.mintf", String.valueOf(mintf))
.setParam("mlt.count", String.valueOf(count));
QueryResponse response = server.query(solrQuery);
if (null == response) return lableGroups;
@SuppressWarnings("unchecked")
SimpleOrderedMap<SolrDocumentList> solrDocumentLists = (SimpleOrderedMap<SolrDocumentList>) response.getResponse().get("moreLikeThis");
for (int i=0; i<solrDocumentLists.size(); i++) {
SolrDocumentList solrDocumentList = solrDocumentLists.getVal(i);
for (SolrDocument doc : solrDocumentList) {
String doc_id = doc.getFieldValue("id").toString();
String doc_title = doc.getFieldValue("title").toString();
String doc_url = doc.getFieldValue("url").toString();
String doc_score = doc.getFieldValue("score").toString();
// 判断相似度
if (Double.parseDouble(doc_score) > 1) {
LableGroup lableGroup = new LableGroup();
lableGroup.setId(doc_id);
lableGroup.setTitle(doc_title);
lableGroup.setUrl(doc_url);
lableGroups.add(lableGroup);
}
}
}
} catch (Exception e) {
log.error("获取相似性数据error", e);
}
return lableGroups;
* 获取相似性数据
* @param id
* @param mindf
* @param mintf
* @param count
* @return
*/
public List<LableGroup> getRelated(String id, int mindf, int mintf, int count) {
List<LableGroup> lableGroups = new ArrayList<LableGroup>();
SolrQuery solrQuery = new SolrQuery();
try {
solrQuery.setQuery("id:" + id)
.setParam("fl", "id,title,url,score")
.setParam("mlt", "true")
.setParam("mlt.fl", "title")
.setParam("mlt.mindf", String.valueOf(mindf))
.setParam("mlt.mintf", String.valueOf(mintf))
.setParam("mlt.count", String.valueOf(count));
QueryResponse response = server.query(solrQuery);
if (null == response) return lableGroups;
@SuppressWarnings("unchecked")
SimpleOrderedMap<SolrDocumentList> solrDocumentLists = (SimpleOrderedMap<SolrDocumentList>) response.getResponse().get("moreLikeThis");
for (int i=0; i<solrDocumentLists.size(); i++) {
SolrDocumentList solrDocumentList = solrDocumentLists.getVal(i);
for (SolrDocument doc : solrDocumentList) {
String doc_id = doc.getFieldValue("id").toString();
String doc_title = doc.getFieldValue("title").toString();
String doc_url = doc.getFieldValue("url").toString();
String doc_score = doc.getFieldValue("score").toString();
// 判断相似度
if (Double.parseDouble(doc_score) > 1) {
LableGroup lableGroup = new LableGroup();
lableGroup.setId(doc_id);
lableGroup.setTitle(doc_title);
lableGroup.setUrl(doc_url);
lableGroups.add(lableGroup);
}
}
}
} catch (Exception e) {
log.error("获取相似性数据error", e);
}
return lableGroups;
}
注:
id:文档唯一主键;
fl:需要返回的字段;
mlt:在查询时,打开/关闭(MoreLikeThisComponent)的布尔值。
mtl.fl:根据哪些字段判断相似度;
mlt.mindf:最小文档频率,所在文档的个数小于这个值的词将不用于相似判断;
mlt.mintf:最小分词频率,在单个文档中出现频率小于这个值的词将不用于相似判断;
mlt.count:返回相似文章个数;