lucene 在广告行业中的应用
业务场景:传统的广告形式可能是将广告列表放到一个list,根据广告定向条件,循环列表匹配广告。效率很低,所以匹配广告改为lucene搜索来提高性能。现在将广告数据创建为lucene中的文档对象。根据广告定向条件精准匹配广告,
代码抽出来的不用看逻辑 只是列了些注意点。
- 广告数据--->Document
应用到的类型: StringField、NumericDocValuesField、TextField、LongField
Document doc=new Document();
doc.add(new StringField("id",id,Field.Store.YES));
doc.add(new StringField("adAppStatus",String.valueOf(adNativeCodeVo.getAdAppStatus()),Field.Store.YES));
doc.add(new StringField("installLimitStatus",String.valueOf(installLimitStatus),Field.Store.YES));
doc.add(new NumericDocValuesField("weight",adNativeCodeVo.getWeight()));
doc.add(new TextField("appName",adNativeCodeVo.getAppName(),Field.Store.YES));
TextField keywordsField=new TextField("keywords",adNativeCodeVo.getKeywords(),Field.Store.YES);
doc.add(new TextField("brandInclude",convertZeroToAll(adNativeCodeVo.getBrandInclude()),Field.Store.YES));
if(StringUtils.isNotBlank(adNativeCodeVo.getBrandExclude())) {
doc.add(new TextField("brandExclude",convertZeroToAll(adNativeCodeVo.getBrandExclude()),Field.Store.YES));
}
keywordsField.setBoost(1.5f);//设置加权
doc.add(keywordsField);
doc.add(new StringField("os",osStr,Field.Store.YES));
doc.add(new TextField("brandInclude",convertZeroToAll(adNativeCodeVo.getBrandInclude()),Field.Store.YES));
doc.add(new LongField("adsBeginTime",DateUtils.getTimeStamp(adNativeCodeVo.getAdsBeginTime()),Field.Store.YES));
updateDocument(id, doc);
- 查询广告
BooleanQuery.Builder builder = new BooleanQuery.Builder();
Analyzer analyzer = new IKAnalyzer(true);//IKAnalyzer 中文分词包
//adAppStatus 默认field,查询不指定field,从默认查找
//比如TextField的brandInclude : key1 key2 。key2就会用默认field查询 如果是 brandInclude : key1,key2则都用次域查询
QueryParser parser = new QueryParser("adAppStatus",analyzer);
//Like {@link #MUST} except that these clauses do not participate in scoring
Query query1 = parser.parse("adAppStatus:1");
builder.add(query1, Occur.FILTER);
//installLimitStatus =1 or =0 必须出现其中一种情况
BooleanQuery.Builder installLimitBuilder = new BooleanQuery.Builder();
installLimitBuilder.add(parser.parse("installLimitStatus: 1"),Occur.SHOULD);
installLimitBuilder.add(parser.parse("installLimitStatus: 0"),Occur.SHOULD);
Query query12 = installLimitBuilder.build();
builder.add(query2, Occur.FILTER);
//1、QueryParser.escape 转义特殊字符。2、,表示or关系
Query query3 = parser.parse("os:all," + QueryParser.escape(vo.getOs()));
builder.add(query3, Occur.FILTER);//系统类型
//数值类型范围查询
long dt=DateUtils.getTimeStamp(DateUtils.getShortNow()+" 00:00:00");
NumericRangeQuery<Long> query4=NumericRangeQuery.newLongRange("adsBeginTime",0L,dt,false,t rue);
builder.add(query4, Occur.FILTER);
//+表示一个查询语句是必须满足的(required),NOT和-表示一个查询语句是不能满足的(prohibited)。
Query query5=parser.parse("brandInclude:all," + QueryParser.escape(vo.getBrand()) + " -brandExclude:" +
QueryParser.escape(vo.getBrand()));
builder.add(query5, Occur.FILTER);
//Occur.MUST_NOT 必须不包含
for(String id:vo.getPushSet()){
//推送列表
TermQuery q = new TermQuery(new Term("id", id));
builder.add(q,Occur.MUST_NOT);
}
//MoreLikeThis must 与should配置 should只是提高排名 并不能左右结果。所以此处相似的往前排名
MoreLikeThis mlt=new MoreLikeThis(this.getReader());
mlt.setFieldNames(new String[]{"appName,codeDesc,keywords"});//用于比较相似度的Field
mlt.setAnalyzer(new IKAnalyzer(true));
mlt.setMinTermFreq(1);
mlt.setMinDocFreq(1);
for(String id : vo.getInstallSet()) {
//安装列表和类似
TermQuery q = new TermQuery(new Term("id", id));
builder.add(q,Occur.MUST_NOT);
builder.add(mlt.like(getSearcher().search(q, 1).scoreDocs[0].doc),Occur.SHOULD);
}
BooleanQuery bq=builder.build();
for(BooleanClause bc:bq) {
//打印query项
logger.debug(bc.getQuery()+" "+bc.getOccur().name());
}
//按照打分、权重(field)排序
TopDocs topDocs = getSearcher().search(bq,num,new Sort(SortField.FIELD_SCORE,new
SortField("weight",SortField.Type.LONG,true)));