Lucene是apache软件基金会4 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包,但它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。
lucene的各版本下载地址:
http://mirror.bit.edu.cn/apache/lucene/java/
本文主要介绍lucene的简单使用。
luke的各个版本下载地址:https://github.com/DmitryKey/luke/releases?after=luke-7.2.0
1.项目结构
2. pom.xml的依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.wjx</groupId>
<artifactId>lucene</artifactId>
<version>1.0-SNAPSHOT</version>
<!--lucene的奔版本号-->
<properties>
<lucene.version>6.6.5</lucene.version>
</properties>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
<!--lucene核心索引包-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-queryparser -->
<!--lucene查询分析-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>${lucene.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-analyzers-common -->
<!--lucene把一段文件按照词进行划分-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>${lucene.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-highlighter -->
<!--lucene高亮显示搜索词-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>${lucene.version}</version>
</dependency>
<!-- 中文分词器 SmartChineseAnalyzer -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>${lucene.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-backward-codecs -->
<!--版本过高,添加如下依赖,不会出现报错-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-backward-codecs</artifactId>
<version>${lucene.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
<!--mysql驱动包-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.47</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.4</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>7</source>
<target>7</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
2.创建索引
Indexer.java
package com.wjx.index;
import com.wjx.util.FileUtil;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import java.io.IOException;
import java.nio.file.Paths;
/**
* @Description: lucene创建索引
* @Auther: wjx
* @Date: 2019/2/12 10:14
*/
public class Indexer {
private Integer ids[] = {1, 2, 3};
private String cities[] = {"北京", "上海", "南京"};
private String des[] = {"北京是中国首都。", "上海市中国魔都。", "南京有玄武湖。"};
private Directory directory;
/**
* 读取IndexWriter实例
*
* @return
*/
private IndexWriter getWriter() throws IOException {
//标准分词器
//Analyzer analyzer = new StandardAnalyzer();
//中文分词器
SmartChineseAnalyzer chineseAnalyzer = new SmartChineseAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(chineseAnalyzer);
IndexWriter writer = new IndexWriter(directory, config);
return writer;
}
/**
* 创建索引
*
* @param path
* @throws IOException
*/
private void createIndexer(String path) throws IOException {
//创建directory
directory = FSDirectory.open(Paths.get(path));
//获取IndexWriter
IndexWriter writer = getWriter();
//创建索引
for (int i = 0; i < ids.length; i++) {
Document document = new Document();
document.add(new StringField("id", ids[i] + "", Field.Store.YES));
document.add(new StringField("city", cities[i] + "", Field.Store.YES));
document.add(new TextField("des", des[i] + "", Field.Store.YES));
writer.addDocument(document);
}
writer.close();
}
public static void main(String[] args) throws IOException {
new Indexer().createIndexer(FileUtil.LUCENE_PATH);
System.out.println("创建索引完成");
}
}
3.进行搜索
Searcher.java
package com.wjx.index;
import com.wjx.util.FileUtil;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import java.io.StringReader;
import java.nio.file.Paths;
/**
* @Description: lucene搜索,高亮显示
* @Auther: wjx
* @Date: 2019/2/12 10:43
*/
public class Searcher {
private Directory directory;
/**
* 进行查询
*
* @param indexPath
* @param words
* @throws Exception
*/
private void search(String indexPath, String words) throws Exception {
directory = FSDirectory.open(Paths.get(indexPath));
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher is = new IndexSearcher(indexReader);
//创建标准分词器替换成中文分词器
SmartChineseAnalyzer chineseAnalyzer = new SmartChineseAnalyzer();
//创建搜索解析器
QueryParser parser = new QueryParser("des", chineseAnalyzer);
//创建查询条件
Query query = parser.parse(words);
long start = System.currentTimeMillis();
//查询出来的相匹配的前100个结果
TopDocs hits = is.search(query, 10);
long end = System.currentTimeMillis();
System.out.println("匹配 " + words + " ,总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录");
/**
* 对查询出来的结果进行高亮处理
*/
//获取高亮显示的对象
Highlighter highlighter = getHighlighter(query);
ScoreDoc[] scoreDocs = hits.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
//命中结果集的id
int hitId = scoreDoc.doc;
//查询出来的对象
Document document = is.doc(hitId);
String id = document.get("id");
String city = document.get("city");
String des = document.get("des");
/**
* 这里是高亮显示部分,如果不高亮实现,
* 直接输出 System.out.println(des);
*/
if (des != null) {
//System.out.println(des); 非高亮显示
//通过中文分词解析器获取token流,分成很多的片段
TokenStream tokenStream = chineseAnalyzer.tokenStream("desc", new StringReader(des));
//高亮显示用中文分词解析器获取得分最高的片段,并不是把所有内容全显示
System.out.println(highlighter.getBestFragment(tokenStream, des));
}
}
}
/**
* 获取高亮显示的对象
* 标准模板,照着抄下来,只需要修改SimpleHTMLFormatter里面的两个参数前缀和后缀即可
*
* @param query
* @return
*/
private Highlighter getHighlighter(Query query) {
//查询出要查询的内容的得分
QueryScorer queryScorer = new QueryScorer(query);
//通过得获取要显示的片段
Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
//把查询结果转换成html格式预处理,默认是粗体
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
//进行高亮处理,第一个参数放置html格式预处理,第二个参数是片段得分 (查询的语汇单元)
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, queryScorer);
//设置高亮要显示的字段
highlighter.setTextFragmenter(fragmenter);
return highlighter;
}
public static void main(String[] args) throws Exception {
new Searcher().search(FileUtil.LUCENE_PATH, "中国");
}
}
4.删除修改
删除索引
public static void main(String[] args) throws Exception {
//设置索引存放位置
Directory directory = FSDirectory.open(Paths.get(FileUtil.LUCENE_PATH));
//标准分词器:英文分词器,后边中文需要转换的中文分词器实现类
Analyzer analyzer = new StandardAnalyzer();
//写索引的配置
IndexWriterConfig config = new IndexWriterConfig(analyzer);
//索引实例,将索引位置,分词器写到一起
IndexWriter writer = new IndexWriter(directory, config);
writer.deleteDocuments(new Term("id","1"));
//加上这句话,强制删除索引文件,不加删除不掉
writer.forceMergeDeletes();
writer.commit();
writer.close();
}
修改索引
public static void main(String[] args) throws Exception {
//设置索引存放位置
Directory directory = FSDirectory.open(Paths.get(FileUtil.LUCENE_PATH));
//标准分词器:英文分词器,后边中文需要转换的中文分词器实现类
Analyzer analyzer = new StandardAnalyzer();
//写索引的配置
IndexWriterConfig config = new IndexWriterConfig(analyzer);
//索引实例,将索引位置,分词器写到一起
IndexWriter writer = new IndexWriter(directory, config);
writer.deleteDocuments(new Term("id", "1"));
Document document = new Document();
document.add(new TextField("id", "2", Field.Store.YES));
document.add(new TextField("name", "my name is lzy", Field.Store.YES));
document.add(new TextField("age", "26", Field.Store.YES));
document.add(new StringField("description", "my name is lzy", Field.Store.YES));//StringField不拆分
writer.updateDocument(new Term("id", "2"), document);
writer.commit();
writer.close();
}
4.一些辅助类
用户实体类 User.java
package com.wjx.pojo;
import lombok.Data;
/**
* @Description:
* @Auther: wjx
* @Date: 2019/2/11 16:11
*/
@Data
public class User {
private int id;
private String name;
private int age;
private String description;
public User() {
}
public User(int id, String name, int age, String description) {
this.id = id;
this.name = name;
this.age = age;
this.description = description;
}
}
文件操作辅助类 FileUtil.java
package com.wjx.util;
/**
* @Description:
* @Auther: wjx
* @Date: 2019/2/12 09:58
*/
public class FileUtil {
public static final String LUCENE_PATH = "d:\\lucene";
public static final String SEARCH_FILE_PATH = "D:\\Program Files\\lucene\\data";
}
数据库辅助类 DBUtil.java
package com.wjx.util;
import com.wjx.pojo.User;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;
/**
* @Description: 数据库工具类
* @Auther: wjx
* @Date: 2019/2/11 15:43
*/
public class DBUtil {
/**
* 获取Connection连接
*
* @return
*/
private static Connection getConnection() {
String driver = "com.mysql.jdbc.Driver";
String url = "jdbc:mysql://114.116.24.32:3306/wjx?useUnicode=true&characterEncoding=UTF-8&useSSL=false";
String userName = "wjx";
String password = "123";
Connection connection = null;
try {
//获取上下文
Class.forName(driver);
connection = DriverManager.getConnection(url, userName, password);
} catch (Exception e) {
e.printStackTrace();
}
return connection;
}
/**
* 释放数据库资源
*
* @param connection
* @param statement
* @param resultSet
*/
private static void releaseDb(Connection connection, PreparedStatement statement, ResultSet resultSet) {
try {
if (connection != null) {
connection.close();
}
if (statement != null) {
statement.close();
}
if (resultSet != null) {
resultSet.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
/**
* 测试lucene创建索引
*
* @return
*/
public static List<User> getLuceneAll() {
List<User> userList = new ArrayList<>();
Connection connection = null;
String sql = "select * from user";
PreparedStatement pstm = null;
ResultSet resultSet = null;
try {
connection = getConnection();
pstm = connection.prepareStatement(sql);
resultSet = pstm.executeQuery();
while (resultSet.next()) {
int id = resultSet.getInt(1);
String name = resultSet.getString(2);
int age = resultSet.getInt(3);
String description = resultSet.getString("description");
userList.add(new User(id, name, age, description));
}
} catch (Exception e) {
e.printStackTrace();
} finally {
releaseDb(connection, pstm, resultSet);
}
return userList;
}
public static void main(String[] args) {
List<User> luceneAll = getLuceneAll();
System.out.println(luceneAll);
}
}
5.把数据库数据提前至索引
package com.wjx.lucene;
import com.wjx.pojo.User;
import com.wjx.util.DBUtil;
import com.wjx.util.FileUtil;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.List;
/**
* @Description: 将查询出来的mysql添加索引
* @Auther: wjx
* @Date: 2019/2/11 11:08
*/
public class Mysql_Writer {
public static void main(String[] args) throws IOException {
//设置索引存放位置
Directory directory = FSDirectory.open(Paths.get(FileUtil.LUCENE_PATH));
//标准分词器:英文分词器,后边中文需要转换的中文分词器实现类
Analyzer analyzer = new StandardAnalyzer();
//写索引的配置
IndexWriterConfig config = new IndexWriterConfig(analyzer);
//索引实例,将索引位置,分词器写到一起
IndexWriter writer = new IndexWriter(directory, config);
List<User> userList = DBUtil.getLuceneAll();
for (User user : userList) {
Document document = new Document();
document.add(new TextField("id", String.valueOf(user.getId()), Field.Store.YES));
document.add(new TextField("name", user.getName(), Field.Store.YES));
document.add(new TextField("age", String.valueOf(user.getAge()), Field.Store.YES));
document.add(new StringField("description", user.getDescription(), Field.Store.YES));//StringField不拆分
writer.addDocument(document);
}
writer.close();
}
}