这里使用的Lucene4.7.0和Lucene3.X稍有不同
有下面三段内容,我想对船一系列的搜索进行加分
bike car jeep truck bus boat
train car ship boat van subway
car plane taxi boat vessel railway
- 定义自定义的MyAnalyzer,实现对字段的有效载荷进行赋值
![](https://i-blog.csdnimg.cn/blog_migrate/8f900a89c6347c561fdf2122f13be562.gif)
![](https://i-blog.csdnimg.cn/blog_migrate/961ddebeb323a10fe0623af514929fc1.gif)
1 package com.pera.lucene.score.payload; 2 3 import java.io.Reader; 4 5 import org.apache.lucene.analysis.Analyzer; 6 import org.apache.lucene.analysis.Tokenizer; 7 import org.apache.lucene.analysis.core.WhitespaceTokenizer; 8 import org.apache.lucene.analysis.payloads.PayloadEncoder; 9 import org.apache.lucene.util.Version; 10 11 public class MyAnalyzer extends Analyzer 12 { 13 14 private PayloadEncoder encoder; 15 16 MyAnalyzer(PayloadEncoder encoder) 17 { 18 this.encoder = encoder; 19 } 20 21 @Override 22 protected TokenStreamComponents createComponents(String fieldName, Reader reader) 23 { 24 // 用来解析空格分隔的各个类别 25 Tokenizer source = new WhitespaceTokenizer(Version.LUCENE_47, reader); 26 // 自定义的Filter,用来获取字段的Payload值 27 MyTokenFilter filter = new MyTokenFilter(source, encoder); 28 29 return new TokenStreamComponents(source, filter); 30 } 31 32 }
- 自定义TokenFilter来达到取得字段的PayLoad值或通过字段对PayLoad值进行分析赋值
![](https://i-blog.csdnimg.cn/blog_migrate/8f900a89c6347c561fdf2122f13be562.gif)
![](https://i-blog.csdnimg.cn/blog_migrate/961ddebeb323a10fe0623af514929fc1.gif)
1 package com.pera.lucene.score.payload; 2 3 import java.io.IOException; 4 5 import org.apache.lucene.analysis.TokenFilter; 6 import org.apache.lucene.analysis.TokenStream; 7 import org.apache.lucene.analysis.payloads.PayloadEncoder; 8 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 9 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; 10 11 public class MyTokenFilter extends TokenFilter 12 { 13 private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); 14 private final PayloadAttribute payAtt = addAttribute(PayloadAttribute.class); 15 private final PayloadEncoder encoder; 16 17 public MyTokenFilter(TokenStream input, PayloadEncoder encoder) 18 { 19 super(input); 20 this.encoder = encoder; 21 } 22 23 @Override 24 public boolean incrementToken() throws IOException 25 { 26 if (input.incrementToken()) 27 { 28 String term = termAtt.toString(); 29 if (App.scoreMap.containsKey(term)) 30 { 31 payAtt.setPayload(encoder.encode(App.scoreMap.get(term).toCharArray())); 32 } else 33 { 34 payAtt.setPayload(null); 35 } 36 return true; 37 } else 38 return false; 39 } 40 41 }
![](https://i-blog.csdnimg.cn/blog_migrate/8f900a89c6347c561fdf2122f13be562.gif)
![](https://i-blog.csdnimg.cn/blog_migrate/961ddebeb323a10fe0623af514929fc1.gif)
1 public static ImmutableMap<String, String> scoreMap = ImmutableMap.of("boat", "5f", "ship", "20f", "vessel", "100f");
- 自定义PayloadSimilarity继承DefaultSimilarity 重载scorePayload方法,在检索时获得之前设置的PayLoad值
![](https://i-blog.csdnimg.cn/blog_migrate/8f900a89c6347c561fdf2122f13be562.gif)
![](https://i-blog.csdnimg.cn/blog_migrate/961ddebeb323a10fe0623af514929fc1.gif)
1 package com.pera.lucene.score.payload; 2 3 import org.apache.lucene.analysis.payloads.PayloadHelper; 4 import org.apache.lucene.search.similarities.DefaultSimilarity; 5 import org.apache.lucene.util.BytesRef; 6 7 public class PayloadSimilarity extends DefaultSimilarity 8 { 9 @Override 10 public float scorePayload(int doc, int start, int end, BytesRef payload) 11 { 12 return PayloadHelper.decodeFloat(payload.bytes); 13 } 14 }
- 建立索引 需要将之前定义的Analyzer和PayloadSimilarity设置到Config中
![](https://i-blog.csdnimg.cn/blog_migrate/8f900a89c6347c561fdf2122f13be562.gif)
![](https://i-blog.csdnimg.cn/blog_migrate/961ddebeb323a10fe0623af514929fc1.gif)
1 package com.pera.lucene.score.payload; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.util.Date; 6 7 import org.apache.lucene.analysis.Analyzer; 8 import org.apache.lucene.analysis.payloads.FloatEncoder; 9 import org.apache.lucene.document.Document; 10 import org.apache.lucene.document.Field.Store; 11 import org.apache.lucene.document.TextField; 12 import org.apache.lucene.index.IndexWriter; 13 import org.apache.lucene.index.IndexWriterConfig; 14 import org.apache.lucene.index.IndexWriterConfig.OpenMode; 15 import org.apache.lucene.search.similarities.Similarity; 16 import org.apache.lucene.store.Directory; 17 import org.apache.lucene.store.FSDirectory; 18 import org.apache.lucene.util.Version; 19 20 public class Indexing 21 { 22 public void indexPayload() throws IOException 23 { 24 Directory dir = FSDirectory.open(new File(App.indexPath)); 25 Analyzer analyzer = new MyAnalyzer(new FloatEncoder()); 26 Similarity similarity = new PayloadSimilarity(); 27 28 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer); 29 iwc.setOpenMode(OpenMode.CREATE).setSimilarity(similarity); 30 Date start = new Date(); 31 System.out.println("Indexing to directory '" + App.indexPath + "'..."); 32 IndexWriter writer = new IndexWriter(dir, iwc); 33 Document doc = new Document(); 34 doc.add(new TextField("tools", "bike car jeep truck bus boat", Store.YES)); 35 writer.addDocument(doc); 36 37 doc = new Document(); 38 doc.add(new TextField("tools", "train car ship boat van subway", Store.YES)); 39 writer.addDocument(doc); 40 41 doc = new Document(); 42 doc.add(new TextField("tools", "car plane taxi boat vessel railway", Store.YES)); 43 writer.addDocument(doc); 44 45 writer.close(); 46 47 Date end = new Date(); 48 System.out.println(end.getTime() - start.getTime() + " total milliseconds"); 49 } 50 }
- 进行检索 检索时要将PayloadSimilarity设置到searcher中
![](https://i-blog.csdnimg.cn/blog_migrate/8f900a89c6347c561fdf2122f13be562.gif)
![](https://i-blog.csdnimg.cn/blog_migrate/961ddebeb323a10fe0623af514929fc1.gif)
1 package com.pera.lucene.score.payload; 2 3 import java.io.File; 4 import java.io.IOException; 5 6 import org.apache.lucene.index.DirectoryReader; 7 import org.apache.lucene.index.IndexReader; 8 import org.apache.lucene.index.Term; 9 import org.apache.lucene.queryparser.classic.ParseException; 10 import org.apache.lucene.search.BooleanClause.Occur; 11 import org.apache.lucene.search.BooleanQuery; 12 import org.apache.lucene.search.Explanation; 13 import org.apache.lucene.search.IndexSearcher; 14 import org.apache.lucene.search.ScoreDoc; 15 import org.apache.lucene.search.TopDocs; 16 import org.apache.lucene.search.payloads.AveragePayloadFunction; 17 import org.apache.lucene.search.payloads.PayloadTermQuery; 18 import org.apache.lucene.store.FSDirectory; 19 20 public class Searching 21 { 22 23 public void searchPayload() throws IOException, ParseException 24 { 25 IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(App.indexPath))); 26 IndexSearcher searcher = new IndexSearcher(reader); 27 28 BooleanQuery bq = new BooleanQuery(); 29 30 PayloadTermQuery ptq1 = new PayloadTermQuery(new Term("tools", "ship"), new AveragePayloadFunction()); 31 PayloadTermQuery ptq2 = new PayloadTermQuery(new Term("tools", "boat"), new AveragePayloadFunction()); 32 PayloadTermQuery ptq3 = new PayloadTermQuery(new Term("tools", "vessel"), new AveragePayloadFunction()); 33 34 bq.add(ptq1, Occur.SHOULD); 35 bq.add(ptq2, Occur.SHOULD); 36 bq.add(ptq3, Occur.SHOULD); 37 38 // 设置自定义的PayloadSimilarity 39 searcher.setSimilarity(new PayloadSimilarity()); 40 TopDocs results = searcher.search(bq, 10); 41 ScoreDoc[] hits = results.scoreDocs; 42 43 int numTotalHits = results.totalHits; 44 System.out.println(numTotalHits + " total matching documents"); 45 46 for (int i = 0; i < hits.length; i++) 47 { 48 int docId = hits[i].doc; // 文档编号 49 float lucene_score = hits[i].score; 50 String tools = searcher.doc(docId).get("tools"); 51 System.out.println("DocId:" + docId + "\tLucene Score:" + lucene_score + "\tTools:" + tools); 52 Explanation explanation = searcher.explain(bq, docId); 53 System.out.println(explanation.toString()); 54 } 55 } 56 }
- 检索结果 可以看到Doc2的排序由于有了PayLoad值排名得到了提升
3 total matching documents
DocId:2 Lucene Score:16.750757 Tools:car plane taxi boat vessel railway
16.750757 = (MATCH) product of:
25.126135 = (MATCH) sum of:
0.3186112 = (MATCH) btq, product of:
0.06372224 = weight(tools:boat in 2) [PayloadSimilarity], result of:
0.06372224 = score(doc=2,freq=0.5 = phraseFreq=0.5
), product of:
0.33736566 = queryWeight, product of:
0.71231794 = idf(docFreq=3, maxDocs=3)
0.4736167 = queryNorm
0.18888181 = fieldWeight in 2, product of:
0.70710677 = tf(freq=0.5), with freq of:
0.5 = phraseFreq=0.5
0.71231794 = idf(docFreq=3, maxDocs=3)
0.375 = fieldNorm(doc=2)
5.0 = AveragePayloadFunction.docScore()
24.807524 = (MATCH) btq, product of:
0.24807523 = weight(tools:vessel in 2) [PayloadSimilarity], result of:
0.24807523 = score(doc=2,freq=0.5 = phraseFreq=0.5
), product of:
0.66565174 = queryWeight, product of:
1.4054651 = idf(docFreq=1, maxDocs=3)
0.4736167 = queryNorm
0.37268022 = fieldWeight in 2, product of:
0.70710677 = tf(freq=0.5), with freq of:
0.5 = phraseFreq=0.5
1.4054651 = idf(docFreq=1, maxDocs=3)
0.375 = fieldNorm(doc=2)
100.0 = AveragePayloadFunction.docScore()
0.6666667 = coord(2/3)
DocId:1 Lucene Score:3.5200772 Tools:train car ship boat van subway
3.5200772 = (MATCH) product of:
5.2801156 = (MATCH) sum of:
4.9615045 = (MATCH) btq, product of:
0.24807523 = weight(tools:ship in 1) [PayloadSimilarity], result of:
0.24807523 = score(doc=1,freq=0.5 = phraseFreq=0.5
), product of:
0.66565174 = queryWeight, product of:
1.4054651 = idf(docFreq=1, maxDocs=3)
0.4736167 = queryNorm
0.37268022 = fieldWeight in 1, product of:
0.70710677 = tf(freq=0.5), with freq of:
0.5 = phraseFreq=0.5
1.4054651 = idf(docFreq=1, maxDocs=3)
0.375 = fieldNorm(doc=1)
20.0 = AveragePayloadFunction.docScore()
0.3186112 = (MATCH) btq, product of:
0.06372224 = weight(tools:boat in 1) [PayloadSimilarity], result of:
0.06372224 = score(doc=1,freq=0.5 = phraseFreq=0.5
), product of:
0.33736566 = queryWeight, product of:
0.71231794 = idf(docFreq=3, maxDocs=3)
0.4736167 = queryNorm
0.18888181 = fieldWeight in 1, product of:
0.70710677 = tf(freq=0.5), with freq of:
0.5 = phraseFreq=0.5
0.71231794 = idf(docFreq=3, maxDocs=3)
0.375 = fieldNorm(doc=1)
5.0 = AveragePayloadFunction.docScore()
0.6666667 = coord(2/3)
DocId:0 Lucene Score:0.106203735 Tools:bike car jeep truck bus boat
0.106203735 = (MATCH) product of:
0.3186112 = (MATCH) sum of:
0.3186112 = (MATCH) btq, product of:
0.06372224 = weight(tools:boat in 0) [PayloadSimilarity], result of:
0.06372224 = score(doc=0,freq=0.5 = phraseFreq=0.5
), product of:
0.33736566 = queryWeight, product of:
0.71231794 = idf(docFreq=3, maxDocs=3)
0.4736167 = queryNorm
0.18888181 = fieldWeight in 0, product of:
0.70710677 = tf(freq=0.5), with freq of:
0.5 = phraseFreq=0.5
0.71231794 = idf(docFreq=3, maxDocs=3)
0.375 = fieldNorm(doc=0)
5.0 = AveragePayloadFunction.docScore()
0.33333334 = coord(1/3)