一、需求
根据文件名来设置评分规则,或者根据文档的修改时间,将最近一年的评分加倍,一年以外的评分降低,等等。
二、具体实现
这里根据文件名来修改评分规则,文件名中包含“JRE”和“SYSTEM”的评分加倍,其余减倍。重点就是怎么获取到文件名,在customScore()方法中,有一个doc变量,我们可以通过这个doc来获取文档,从而获取文件名。另外还有一点就是Lucene有域缓存,只要IndexReader没有关闭,所有的数据都会存在域缓存中,我们可以利用这个特性,将文件名这个域的内容取出来。
package com.wsy;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.function.CustomScoreProvider;
import org.apache.lucene.search.function.CustomScoreQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Random;
public class MyScoreQuery {
private static Directory directory;
private static IndexReader indexReader;
static {
try {
directory = FSDirectory.open(new File("E:\\Lucene\\IndexLibrary"));
indexReader = IndexReader.open(directory);
} catch (IOException e) {
e.printStackTrace();
}
}
public void index(boolean update) {
IndexWriter indexWriter = null;
try {
indexWriter = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
if (update) {
indexWriter.deleteAll();
}
File[] files = new File("E:\\Lucene\\SearchSource").listFiles();
Random random = new Random();
for (File file : files) {
int score = random.nextInt(100);
Document document = new Document();
document.add(new Field("content", new FileReader(file)));
document.add(new Field("fileName", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
document.add(new Field("path", file.getAbsolutePath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
document.add(new NumericField("date", Field.Store.YES, true).setLongValue(file.lastModified()));
document.add(new NumericField("size", Field.Store.YES, true).setIntValue((int) (file.length())));
document.add(new NumericField("score", Field.Store.YES, true).setIntValue(score));
indexWriter.addDocument(document);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (indexWriter != null) {
try {
indexWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public void searchByFileScoreQuery() {
try {
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
Query query = new TermQuery(new Term("content", "java"));
// 根据评分域和原有的Query创建自定义的Query对象
FileNameScoreQuery fileNameScoreQuery = new FileNameScoreQuery(query);
TopDocs topDocs = indexSearcher.search(fileNameScoreQuery, 100);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
Document document = indexSearcher.doc(scoreDoc.doc);
System.out.println(scoreDoc.doc + "-->" + scoreDoc.score + "-->" + document.get("fileName") + "-->" + document.get("score") + "-->" + document.get("size"));
}
indexSearcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
private class FileNameScoreQuery extends CustomScoreQuery {
public FileNameScoreQuery(Query subQuery) {
super(subQuery);
}
@Override
protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException {
// return super.getCustomScoreProvider(reader);
return new FileNameScoreProvider(indexReader);
}
}
private class FileNameScoreProvider extends CustomScoreProvider {
String[] fileNames = null;
public FileNameScoreProvider(IndexReader reader) {
super(reader);
try {
// 从域缓存中获取数据,这里获取域值为fileName的信息
fileNames = FieldCache.DEFAULT.getStrings(indexReader, "fileName");
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException {
// 在IndexReader没有关闭之前,所有的数据都会存储在域缓存中,可以通过域缓存获取到有用的信息
// 优点:速度快;缺点:会占用大量的内存
// 根据doc获取fileName的值
String fileName = fileNames[doc];
if (fileName.contains("JRE") || fileName.contains("SYSTEM")) {
return 10 * subQueryScore;
}
return subQueryScore / 10;
// return super.customScore(doc, subQueryScore, valSrcScore);
}
}
public static void main(String[] args) {
MyScoreQuery myScoreQuery = new MyScoreQuery();
myScoreQuery.index(true);
myScoreQuery.searchByFileScoreQuery();
}
}