/**
* 创建索引文件 触发更新数据库搜索文件
* @param contents 建立索引内容数据
* @param searchDir 索引文件目录
*/
public static void createIndex(List<Content> contents, String searchDir, boolean hasDelete) {
IndexWriter iwriter = null;
try {
// 获取索引文件位置
Path dirPath = Paths.get(searchDir);
if (!Files.exists(dirPath)) {
Files.createDirectories(dirPath);
}
// 设置索引参数
Directory directory = FSDirectory.open(dirPath);
// 自定义停用词
CharArraySet cas = new CharArraySet(0, true);
String stopDicPath = LuceneUtils.class.getResource("/stopword.dic").getFile();;
List<String> lines = FileUtils.readLines(new File(stopDicPath), Constant.CHARSET);
if (lines != null && lines.size() > 0) {
for (String line : lines) {
cas.add(line);
}
}
// 加入系统默认停用词
Iterator<Object> itor = SmartChineseAnalyzer.getDefaultStopSet().iterator();
while (itor.hasNext()) {
cas.add(itor.next());
}
Analyzer analyzer = new SmartChineseAnalyzer(cas);
IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
iwriter = new IndexWriter(directory, iwConfig);
if(hasDelete){
iwriter.deleteAll();// 删除上次的索引文件,重新生成索引
}
for (Content content : contents) {
Document doc = new Document();
doc.add(new StringField("id", String.valueOf(content.getId()),Store.YES));
doc.add(new TextField("platform", content.getPlatform().name(),Store.YES));
doc.add(new TextField("sname", content.getSname(), Store.YES));
doc.add(new TextField("introduction", content.getIntroduction(), Store.YES));
doc.add(new TextField("actor", content.getActor(), Store.YES));
doc.add(new TextField("director", content.getDirector(), Store.YES));
doc.add(new StringField("picture", content.getPicture(), Store.YES));
doc.add(new StringField("program_code", content.getProgramCode(), Store.YES));
doc.add(new TextField("category_code", content.getCategoryCode(), Store.YES));
// 需要处理
doc.add(new TextField("category", content.getClassify().getSname(), Store.YES));
doc.add(new StringField("dtype", content.getDtype().name(), Store.YES));
iwriter.addDocument(doc);
}
iwriter.close();
analyzer.close();
directory.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
以上是建立索引。需要注意的地方:1:停用词(改字不进行索引)和分词器(“我不是药神”分为“我”、“我不是”、“药神”、“我不是药神”)的概念。
2:StringFlied(不分词)和TextFiled(分词)的区别。
下面是读取索引:
public static List<Content> searchIndex(String keyword, PlatformType formType, int quantity) {
String indexPath = ConfigUtils.getValue(Constant.LUCENE_INDEX_FOLDER);
if(formType == null){
formType = PlatformType.HW;
}
List<Content> contents = new ArrayList<Content>();
Path path = Paths.get(indexPath);
try {
DirectoryReader reader = DirectoryReader.open(FSDirectory.open(path));
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new SmartChineseAnalyzer();
String[] fields = {"sname"};
String[] stringQuery = {keyword};
Query multiQuery = MultiFieldQueryParser.parse(stringQuery, fields,analyzer);
//Query limitQuery = new TermQuery(new Term("platform", formType.name()));
QueryBuilder queryBuilder = new QueryBuilder(analyzer);
Query limitQuery = queryBuilder.createBooleanQuery("platform", formType.name(), Occur.MUST);
BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
booleanQuery.add(multiQuery, Occur.MUST);
booleanQuery.add(limitQuery, Occur.MUST);
TopDocs hits = searcher.search(booleanQuery.build(), quantity);
ScoreDoc[] scoreDocs = hits.scoreDocs;
for (ScoreDoc sd : scoreDocs) {
Document doc = searcher.doc(sd.doc);
String platformStr = doc.get("platform");
PlatformType platform = null;
if(StringUtils.isNotBlank(platformStr)) {
platform = PlatformType.valueOf(platformStr);
if(formType != null && formType != platform) {
continue;
}
}
Content content = new Content();
String dtypeStr = doc.get("dtype");
if(StringUtils.isNotBlank(dtypeStr)) {
content.setDtype(MediaType.valueOf(dtypeStr));
}
content.setId(Integer.parseInt(doc.get("id")));
content.setActor(doc.get("actor"));
content.setIntroduction(doc.get("introduction"));
content.setSname(doc.get("sname"));
content.setPicture(doc.get("picture"));
content.setCategoryCode(doc.get("category_code"));
String category = doc.get("category");
if (StringUtils.isNotBlank(category)) {
content.setClassify(new Classify());
content.getClassify().setSname(category);
}
content.setPlatform(platform);
contents.add(content);
}
analyzer.close();
reader.close();
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return contents;
}
所需要的依赖(建立索引):
<!--导入luence -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queries</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-sandbox</artifactId>
<version>${lucene.version}</version>
</dependency>
读取索引的依赖
<!--导入luence -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queries</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-sandbox</artifactId>
<version>${lucene.version}</version>
</dependency>
注释:关于Files工具类等判断路径的一些工具类,视具体情况而定。判断文件工具类很多(网上搜)。文件存放的路径视情况而定。
2:涉及的lucene版本为<lucene.version>7.5.0</lucene.version>