public static void main(String[] args) {//测试solr中是否有相似的主题没有聚到一起
SolrServer server1 = null;
try {
server1 = SolrHelper.getCloudSolrServer(SolrHelper.COLLECTION_CLUSTER_TOPICEVENT);
String q = "DATASOURCEID:XZ AND PAGETYPE:4 AND DOCTYPE:2 AND CREATETIME:[20190730160000 TO 20190731000000]";
List<Map<String, Object>> mapList = SolrHelper.queryVagueObject(server1, q, null, null, null,
"CREATETIME", "DESC");
System.out.println("solr查询条数:" + mapList.size());
StringBuffer sbuf = new StringBuffer();
for (Map<String, Object> map : mapList) {
String title = map.get("TITLE").toString();
int len = title.length();
// String title0 = title.substring(0, len/5);
// String title2 = title.substring(4*len/5);
String title1 = title.substring(len / 5, (4 * len) / 5);
LinkedHashMap<String, Double> tfidfcontent = null;
LinkedHashMap<String, Double> tfidf = null;
map.put("ISPASS", "true");
for (Map<String, Object> mapsub : mapList) {
if (mapsub.get("ISPASS") != null) continue;
String t = mapsub.get("TITLE").toString();
if (t.contains(title1)) {
if (map.get("ISPRINT") == null) {
sbuf.append("\n源key:" + map.get("id"));
sbuf.append("\n源TITLE:" + title);
sbuf.append("\n3/5源TITLE1:" + title1);
// sbuf.append("\n1/5源TITLE0:" + title0);
// sbuf.append("\n1/5源TITLE2:" + title2);
sbuf.append("\n源更新时间:" + map.get("UPDATETIME"));
sbuf.append("\n源创建时间:" + map.get("CREATETIME"));
sbuf.append("\n源发布时间:" + map.get("TIME"));
map.put("ISPRINT", "true");
tfidfcontent = (LinkedHashMap<String, Double>) JsonUtil.jsonToObj
(map.get("TFIDFCONTENT").toString(), LinkedHashMap.class);
tfidf = (LinkedHashMap<String, Double>) JsonUtil.jsonToObj
(map.get("TFIDF").toString(), LinkedHashMap.class);
}
LinkedHashMap<String, Double> subtfidfcontent = (LinkedHashMap<String, Double>) JsonUtil
.jsonToObj(mapsub.get("TFIDFCONTENT").toString(), LinkedHashMap.class);
LinkedHashMap<String, Double> subtfidf = (LinkedHashMap<String, Double>) JsonUtil.jsonToObj
(mapsub.get("TFIDF").toString(), LinkedHashMap.class);
sbuf.append("\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
sbuf.append("\n相似key:" + mapsub.get("id").toString());
//sbuf.append("\n相似TITLE:" + t);
sbuf.append("\n相似更新时间:" + mapsub.get("UPDATETIME"));
sbuf.append("\n相似创建时间:" + mapsub.get("CREATETIME"));
sbuf.append("\n相似发布时间:" + mapsub.get("TIME"));
sbuf.append("\n标题相识度:" + MathUtil.mapcosine(tfidf, subtfidf));
sbuf.append("\n内容相似度:" + MathUtil.mapcosine(tfidfcontent, subtfidfcontent));
}
}
sbuf.append("\n###########################################################");
System.out.println(sbuf);
sbuf = new StringBuffer();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
SolrHelper.free(server1);
}
}
从solr中查询是否有相似的数据
最新推荐文章于 2021-04-28 13:38:37 发布