Lucene入门案例 - 2
1. 添加分析器步骤
- 创建一个Analyzer对象,StandardAnalyzer对象
- 使用分析器对象的tokenStream方法获得一个TokenStream对象
- 向TokenStream对象值设置一个引用,相当于一个指针
- 调用TokenStream对象的rest方法,如果不调用抛异常
- 使用while循环遍历TokenStream对象
- 关闭TokenStream对象
2. 代码如下:
/**
* 使用标准分析器
* @throws Exception
*/
@Test
public void testTokenStream() throws Exception {
//创建一个Analyzer对象,StandardAnalyzer对象
Analyzer analyzer = new StandardAnalyzer();
//使用分析器对象的tokenStream方法获得一个TokenStream对象
TokenStream tokenStream = analyzer.tokenStream("",
"The Spring Framework provides a comprehensive programming and configuration model for modern");
//向TokenStream对象值设置一个引用,相当于一个指针
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
//调用TokenStream对象的rest方法,如果不调用抛异常
tokenStream.reset();
//使用while循环遍历TokenStream对象
while(tokenStream.incrementToken()){
System.out.println(charTermAttribute.toString());
}
//关闭TokenStream对象
tokenStream.close();
}
分析结果:
尝试检索中文
TokenStream tokenStream = analyzer.tokenStream("",
"创建一个Analyzer对象,StandardAnalyzer对象");
结果:
很明显我们需要的是关键词的分析而不是单个文字
3.中文分析器(IK-Analyzer)
- 添加jar包(lK-Analyzer-1.0-SNAPSHOT.jar)
- 把配置文件和扩展词典添加到工程lclasspath下
注意:扩展词典严禁使用windows记事本编辑(UTF-8+BOM),需保证编码格式为UTF-8 - 配置文件:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>IK Analyzer 扩展配置</comment>
<!--用户可以在这里配置自己的扩展字典 -->
<entry key="ext_dict">hotword.dic;</entry>
<!--用户可以在这里配置自己的扩展停止词字典-->
<entry key="ext_stopwords">stopword.dic;</entry>
</properties>
改良案例 1的代码
//IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig());
IndexWriterConfig config = new IndexWriterConfig(new IKAnalyzer());
IndexWriter indexWriter = new IndexWriter(directory, config);
分析效果:
4. Field的属性
入门案例1的改良:
//Field fieldPath = new TextField("path", filePath, Field.Store.YES);
Field fieldPath = new StoredField("path", filePath);
//Field fieldSize = new TextField("size", fileSize + "", Field.Store.YES);
Field fieldSizeValue = new LongPoint("size", fileSize);
Field fieldSizeStore = new StoredField("size", fileSize);
5. 删除索引文档
代码:
/**
* 删除所有
* @throws Exception
*/
@Test
public void delectDocument()throws Exception{
indexWriter.deleteAll();
indexWriter.close();
}
/**
* 删除指定的文档
* @throws Exception
*/
@Test
public void delectDocumentByQuery()throws Exception{
indexWriter.deleteDocuments(new Term("name","apache"));
indexWriter.close();
}
6. 更新索引文档
/**
* 更新文档
* @throws Exception
*/
@Test
public void updateDocument()throws Exception{
Document document = new Document();
document.add(new TextField("name","更新后的文档", Field.Store.YES));
document.add(new TextField("name1","更新后的文档", Field.Store.YES));
document.add(new TextField("name2","更新后的文档", Field.Store.YES));
//先删除再添加文档
indexWriter.updateDocument(new Term("name","spring"),document);
indexWriter.close();
}
7. 查询索引文档数值范围(根据RangeQuery查询)
package com.allen.lucene;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
public class SearchIndex {
private IndexReader indexReader;
private IndexSearcher indexSearcher;
@Before
public void init () throws Exception {
indexReader = DirectoryReader.open(FSDirectory.open(new File("E:\\Desktop\\Director").toPath()));
indexSearcher = new IndexSearcher(indexReader);
}
/**
* 通过RangeQuery查询索引文档
*/
@Test
public void testRangeQuery()throws Exception {
//创建Query对象
Query query = LongPoint.newRangeQuery("size", 0, 100);
printResult(query);
}
/**
* 提取共用方法
* @param query
* @throws Exception
*/
private void printResult(Query query)throws Exception {
//执行查询
TopDocs topDocs = indexSearcher.search(query, 10);
System.out.println("总记录数: "+topDocs.totalHits);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
//取文档id
int docId = scoreDoc.doc;
//IndexSearcher文档搜索器根据id取文档对象
Document document = indexSearcher.doc(docId);
System.out.println(document.get("name"));
System.out.println(document.get("path"));
System.out.println(document.get("size"));
System.out.println("----------------");
}
indexReader.close();
}
}