--于2020.08.28从jar包更新为了maven依赖
什么是Lucene?
Lucene是apache下的一个开放源代码的全文检索引擎工具包,通过它可以实现全文检索。
什么是全文检索(Full-text Search)?
将非结构化数据(如文本)中的一部分信息提取出来,重新组织,使其变得有一定结构,然后对此有一定结构的数据进行搜索,从而达到搜索相对较快的目的。这部分从非结构化数据中提取出的然后重新组织的信息,我们称之索引。
而先建立好索引,再对索引进行搜索的过程就叫全文检索(Full-text Search)
全文索引和搜索流程图:
maven依赖
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>4.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-memory</artifactId>
<version>4.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>4.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>4.10.0</version>
</dependency>
<!-- 中文分词器 -->
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
</dependency>
接下来是lucene的增删改查
package com.y20.core;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.io.File;
import java.io.IOException;
public class IndexTest {
public static String PATH="D:\\luceneindex";
public static void main(String[] args) {
//1.新增后查询
//insert();
//select("晚宴");
//2.查一个不是索引的词条
//select("2017");
//3.修改索引后查询
//update("晚宴");
//select("扶贫");
//4.删除索引后查询
delete("扶贫");
select("扶贫");
}
public static void insert(){
try {
//1.设置索引存放目录
FSDirectory iDirectory=FSDirectory.open(new File(PATH));
//2.设置分词器IKAnalyzer中文分词工具包
Analyzer analyzer=new IKAnalyzer();
//根据lucene版本不同,IndexWriterConfig方法出现了变化,之前的写法为:
// IndexWriterConfig config=new IndexWriterConfig(analyzer); 不需要加版本号
IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_4_10_0,analyzer);
//3.创建索引写入器(索引存放处,分词器)
IndexWriter indexWriter=new IndexWriter(iDirectory, config);
//4.创建索引
Document document=new Document();
//5.创建一个属性
Field title = new TextField("title", "2017最具期待数字平台竞标晚宴”圆满落幕-搜狐新闻", Field.Store.YES);
Field keyWord = new TextField("key", "竞标 晚宴 ",Field.Store.YES);
Field content = new TextField("content", "10月18日,由金投赏主办、华扬联众独家承办的“2017最具期待数字平台竞标晚宴", Field.Store.YES);
Field url=new TextField("url","http://news.sohu.com/20161028/n471629875.shtml", Field.Store.YES);
//6.将属性添加到索引中
document.add(title);
document.add(keyWord);
document.add(content);
document.add(url);
//7.将索引写入到写入器中
indexWriter.addDocument(document);
//8.关闭写入流
indexWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void select(String indexName){
//1.设置索引存放的目录
try {
FSDirectory iDirectory=FSDirectory.open(new File(PATH));
//2.读取所设置目录中的索引
DirectoryReader reader=DirectoryReader.open(iDirectory);
//3.创建索引查询器
IndexSearcher searcher=new IndexSearcher(reader);
//4.构建一个语法解析器(参数(创建索引所用的关键词,分词器))
QueryParser parser=new QueryParser("key",new IKAnalyzer());
//5.解析目标字符串,通过解析这个可以得到你所需要的词条信息(匹配“竞标和key”)
Query query=parser.parse(indexName);
//6.执行查询,得到的数组(第二个参数为前多少条)
TopDocs docs=searcher.search(query, 50);
//7.取数据
ScoreDoc[] scoreDocs=docs.scoreDocs;
for(int i=0;i<scoreDocs.length;i++){
int id=scoreDocs[i].doc;
Document doc=searcher.doc(id);
String context=doc.get("content");
System.out.println("搜索到的内容是:"+context);
}
//8.关连接
reader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
//修改的逻辑是将之前索引删除再创建一个新的
public static void update(String indexName){
try {
//1.设置索引存放目录
FSDirectory iDirectory=FSDirectory.open(new File(PATH));
//2.设置分词器IKAnalyzer中文分词工具包
Analyzer analyzer=new IKAnalyzer();
IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_4_10_0,analyzer);
//3.创建索引写入器(索引存放处,分词器)
IndexWriter indexWriter=new IndexWriter(iDirectory, config);
//4.创建索引
Document document=new Document();
//5.创建一个属性
Field title = new TextField("title", "“绿水青山就是金山银山”在浙江的探索和实践-搜狐新闻-已修改", Field.Store.YES);
Field keyWord = new TextField("key", "竞标 扶贫", Field.Store.YES);
Field content = new TextField("content", "这是浙江德清县筏头乡“裸心谷生态度假村”以低碳理念兴建的", Field.Store.YES);
Field url = new TextField("url", "http://news.sohu.com/20170526/n494527253.shtml", Field.Store.YES);
//6.将属性添加到索引中
document.add(title);
document.add(keyWord);
document.add(content);
document.add(url);
//7.创建语法解析器,解析目标字符串
QueryParser parser = new QueryParser("key",new IKAnalyzer());
Query query = parser.parse(indexName);
//8.将索引写入到写入器中,将之前的索引先删除再添加
indexWriter.deleteDocuments(query);
indexWriter.addDocument(document);
//9.关闭写入流
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public static void delete(String indexName){
try {
//1.设置索引存放目录
FSDirectory iDirectory=FSDirectory.open(new File(PATH));
//2.设置分词器IKAnalyzer中文分词工具包
Analyzer analyzer=new IKAnalyzer();
IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_4_10_0,analyzer);
//3.创建索引写入器(索引存放处,分词器)
IndexWriter indexWriter=new IndexWriter(iDirectory, config);
//4.创建语法解析器,解析目标字符串
QueryParser parser = new QueryParser("key",new IKAnalyzer());
Query query = parser.parse(indexName);
//5.删除索引
indexWriter.deleteDocuments(query);
//6.关闭写入流
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
在insert之后,可以在指定文件夹中看到: