Lucence全文检索
创建索引库
public class LucenceFirst {
@Test
public void createIndex() throws Exception {
Directory directory = FSDirectory.open(new File("要创建的索引库位置").toPath());
IndexWriterConfig config = new IndexWriterConfig(new IKAnalyzer());
IndexWriter indexWriter = new IndexWriter(directory,config);
File dir = new File("资源所在位置");
File[] files = dir.listFiles();
for (File file : files) {
String fileName = file.getName();
String filePath = file.getPath();
String filecontent = FileUtils.readFileToString(file, "utf-8");
long filesize = FileUtils.sizeOf(file);
Field fieldname = new TextField("name",fileName,Field.Store.YES);
Field fieldpath = new StoredField("path",filePath);
Field fieldcontent = new TextField("content",filecontent,Field.Store.YES);
Field fieldSizeValue = new LongPoint("size",filesize);
Field fieldSizeStore = new StoredField("size",filesize);
Document document = new Document();
document.add(fieldSizeStore);
document.add(fieldSizeValue);
document.add(fieldcontent);
document.add(fieldname);
document.add(fieldpath);
indexWriter.addDocument(document);
}
indexWriter.close();
}
创建索引库是首先要利用分析器(IKAnalyzer)分词,再通过关键词创建索引,创建文档document,再把文档放入索引库
查询索引库,根据关键词
@Test
public void searchIndex() throws Exception {
//指出索引仓库的位置
Directory directory = FSDirectory.open(new File("索引库位置").toPath());
IndexReader indexReader= DirectoryReader.open(directory);
//创建查询对象indexSearcher
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
//根据term来创建query对象 检索条件
Query query = new TermQuery(new Term("name","welcome"));
TopDocs topDocs = indexSearcher.search(query, 10);
System.out.println("总记录数"+topDocs.totalHits);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
int docId = scoreDoc.doc;
Document document = indexSearcher.doc(docId);
System.out.println(document.get("name"));
System.out.println(document.get("path"));
System.out.println(document.get("size"));
System.out.println(document.get("content"));
}
indexReader.close();
}
测试分析器
@Test
public void testTokenStream() throws Exception{
Analyzer analyzer = new IKAnalyzer();
TokenStream tokenStream = analyzer.tokenStream("", "9月在新德里举行的美国与印度“2+2”会谈后,美国意欲实施其雄心勃勃的印度战略,然而,这一战略要取得成功,须解决三个关键问题。这是日前美国《华尔街日报》网站上刊发的《印度可以成为美国的重要伙伴——这个世界上人口最多的民主国家可以挫败中国谋求主宰地位的努力》的文章观点。");
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
tokenStream.reset();
while(tokenStream.incrementToken()) {
System.out.println(charTermAttribute.toString());
}
tokenStream.close();
}
用到的jar包
全文检索,其实就是用分词工具把资源分解成关键词,通过关键词编索引(二叉树)来再通过关键词查找,还有词条查询,以上都是基础入门的知识。。。