一.什么是lucene
Lucene是一个全文搜索框架,而不是应用产品。因此它并不像www.baidu.com 或者google Desktop那么拿来就能用,它只是提供了一种工具让你能实现这些产品。
下载地址 : http://lucene.apache.org/java
官网:http://lucene.apache.org/
主要应用领域:搜索引擎(百度,搜狗)、站内搜索(微博搜索)、电商网站(京东,淘宝)
全文检索和数据库like查询的区别
数据查询通常的做法是是通过数据库模糊匹配即Like ‘%keyword%’的方式,通过它和全文检索对比来分析数据库like模糊查询和全文检索的区别。
Lucene与搜索引擎的区别
全文检索系统是按照全文检索理论建立起来的用于提供全文检索服务的软件系统。全文检索系统是一个可以运行的系统,包括建立索引、处理查询返回结果集、增加索引、优化索引结构等功能。例如:百度搜索、eclipse帮助搜索、淘宝网商品搜索。
搜索引擎是全文检索技术最主要的一个应用,例如百度。搜索引擎起源于传统的信息全文检索理论,即计算机程序通过扫描每一篇文章中的每一个词,建立以词为单位的倒排文件,检索程序根据检索词在每一篇文章中出现的频率和每一个检索词在一篇文章中出现的概率,对包含这些检索词的文章进行排序,最后输出排序的结果。全文检索技术是搜索引擎的核心支撑技术。
Lucene和搜索引擎不同,Lucene是一套用java写的全文检索的工具包,为应用程序提供了很多个api接口去调用,可以简单理解为是一套实现全文检索的类库,搜索引擎是一个全文检索系统,它是一个单独运行的软件。
lucene 分词例子
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class IndexDemo {
static String dir="E:\\index";
//定义分词器
static Analyzer analyzer = new IKAnalyzer();
public static void main(String[] args) throws Exception {
//write();
search();
}
/**
* 创建索引
* @throws IOException
*/
public static void write() throws IOException{
//索引库的存储目录
Directory directory = FSDirectory.open(new File(dir));
//关联lucene版本和当前分词器
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
//创建索引 (传入写入的目录和分词器)
IndexWriter iwriter = new IndexWriter(directory, config);
//document对象field属性
Document doc=new Document();
Field field=new Field("userName","张三",TextField.TYPE_STORED);
doc.add(field);
field=new Field("userDesc","张三来自深圳喜欢看书",TextField.TYPE_STORED);
doc.add(field);
iwriter.addDocument(doc);
Document doc1=new Document();
Field field1=new Field("userName","李四",TextField.TYPE_STORED);
doc1.add(field1);
field1=new Field("userDesc","李四来自兰州喜欢打篮球",TextField.TYPE_STORED);
doc1.add(field1);
iwriter.addDocument(doc1);
//提交事务
iwriter.commit();
iwriter.close();
}
/**
* 搜索索引
* @throws IOException
* @throws ParseException
*/
public static void search() throws IOException, ParseException{
//索引库的存储目录
Directory directory = FSDirectory.open(new File(dir));
//读取索引库的存储目录
DirectoryReader ireader = DirectoryReader.open(directory);
//搜索类
IndexSearcher isearcher = new IndexSearcher(ireader);
//lucene的查询解析器 用于指定查询的属性名和分词器
QueryParser parser = new QueryParser(Version.LUCENE_47, "userDesc", analyzer);
//开始搜索
Query query = parser.parse("自兰");
//用来存储等分高的document 获取搜索的结果 指定返回的document个数
ScoreDoc[] hits = isearcher.search(query, null, 10).scoreDocs;
//从ScoreDoc数组中获取单独的document
for (int i = 0; i < hits.length; i++) {
Document hitDoc = isearcher.doc(hits[i].doc);
System.out.println(hitDoc.getField("userName").stringValue());
}
ireader.close();
directory.close();
}
}
通过lucene实现数据库的全文搜索例子
@Controller
public class FoodController {
@Autowired
FoodService service;
@ResponseBody
@RequestMapping("/queryFood")
public List<String> serch(String foodname){
return service.search(foodname);
}
/**
* 写方法
*/
@ResponseBody
@RequestMapping("/loadData")
public void write(){
service.write();
}
}
dao层
import java.util.List;
import java.util.Map;
public interface FoodDao {
/**
*获取数据库中的数据
* @param foodname
* @return
*/
public List<Map<String, Object>> getFood();
/**
* 创建索引
*/
public void write();
/**
* 搜索索引
*/
public List<String> search(String foodname);
}
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Repository;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.et.dao.FoodDao;
@Repository
public class FoodDaoImpl implements FoodDao {
static String dir="E:\\index";
//定义分词器
static Analyzer analyzer = new IKAnalyzer();
@Autowired
JdbcTemplate jdbc;
public List<Map<String, Object>> getFood() {
String sql="select * from food";
List<Map<String, Object>> result=jdbc.queryForList(sql);
return result;
}
public void write() {
//索引库的存储目录
Directory directory;
try {
directory = FSDirectory.open(new File(dir));
//关联lucene版本和当前分词器
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
//创建索引 (传入写入的目录和分词器)
IndexWriter iwriter = new IndexWriter(directory, config);
//获取数据库中的数据
List<Map<String, Object>> list= getFood();
//将数据库中的数据写入document对象中
for (Map<String, Object> map : list) {
//document对象field属性
Document doc=new Document();
Field field=new Field("foodname",map.get("foodname").toString(),TextField.TYPE_STORED);
doc.add(field);
iwriter.addDocument(doc);
}
//提交事务
iwriter.commit();
iwriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public List<String> search(String foodname) {
List<String> ls=new ArrayList<String>();
//索引库的存储目录
Directory directory;
try {
directory = FSDirectory.open(new File(dir));
//读取索引库的存储目录
DirectoryReader ireader = DirectoryReader.open(directory);
//搜索类
IndexSearcher isearcher = new IndexSearcher(ireader);
//lucene的查询解析器 用于指定查询的属性名和分词器
QueryParser parser = new QueryParser(Version.LUCENE_47, "foodname", analyzer);
//开始搜索
Query query = parser.parse(foodname);
//用来存储等分高的document 获取搜索的结果 指定返回的document个数
ScoreDoc[] hits = isearcher.search(query, null, 10).scoreDocs;
//从ScoreDoc数组中获取单独的document
for (int i = 0; i < hits.length; i++) {
Document hitDoc = isearcher.doc(hits[i].doc);
ls.add(hitDoc.getField("foodname").stringValue());
//ik分词器的方式
StringReader sr=new StringReader(hitDoc.getField("foodname").stringValue());
IKSegmenter ik=new IKSegmenter(sr, false); //true代表调用IKSegmenter()构造函数时使用智能分词
Lexeme lex=null;
while((lex=ik.next())!=null){
System.out.print(lex.getLexemeText()+"|");
}
}
ireader.close();
directory.close();
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
return ls;
}
}
entity层
public class Food {
private int foodid;
private String foodname;
private int price;
private String imagepath;
public Food(){
}
public Food(int foodid, String foodname, int price, String imagepath) {
super();
this.foodid = foodid;
this.foodname = foodname;
this.price = price;
this.imagepath = imagepath;
}
public int getFoodid() {
return foodid;
}
public void setFoodid(int foodid) {
this.foodid = foodid;
}
public String getFoodname() {
return foodname;
}
public void setFoodname(String foodname) {
this.foodname = foodname;
}
public int getPrice() {
return price;
}
public void setPrice(int price) {
this.price = price;
}
public String getImagepath() {
return imagepath;
}
public void setImagepath(String imagepath) {
this.imagepath = imagepath;
}
}
service层
import java.util.List;
import java.util.Map;
public interface FoodService {
/**
* 获取数据库中的数据
* @param foodname
* @return
*/
public List<Map<String, Object>> getFood();
/**
* 创建索引
*/
public void write();
/**
* 搜索索引
*/
public List<String> search(String foodname);
}
import java.util.List;
import java.util.Map;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.et.dao.FoodDao;
import com.et.service.FoodService;
@Service
public class FoodServiceImpl implements FoodService{
@Autowired
FoodDao dao;
public List<Map<String, Object>> getFood() {
return dao.getFood();
}
@Override
public void write() {
dao.write();
}
@Override
public List<String> search(String foodname) {
return dao.search(foodname);
}
}
运行主类
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class Main {
public static void main(String[] args) {
//启动会加载自动配置
SpringApplication.run(Main.class, args);
}
}
application.properties
spring.datasource.url=jdbc:mysql://localhost/food
spring.datasource.username=root
spring.datasource.password=123456
spring.datasource.driver-class-name=com.mysql.jdbc.Driver
pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.et</groupId>
<artifactId>Lucene</artifactId>
<version>0.0.1-SNAPSHOT</version>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>1.5.9.RELEASE</version>
</parent>
<dependencies>
<!--IK分词器的配置-->
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
</dependency>
<!--
springboot每一个框架的集成都是一个starter
spring-boot-starter-web加载javaee 内嵌tomcat
-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<!-- 可直接访问jsp的配置 使用jsp添加的依赖 -->
<groupId>org.apache.tomcat.embed</groupId>
<artifactId>tomcat-embed-jasper</artifactId>
</dependency>
<!-- 使用连接数据的starter -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
<!-- 应用freemarker的starter -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-freemarker</artifactId>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.35</version>
</dependency>
<!-- druid的配置 -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<version>1.1.5</version>
</dependency>
<!-- 继承mybatis的配置 -->
<dependency>
<groupId>org.mybatis.spring.boot</groupId>
<artifactId>mybatis-spring-boot-starter</artifactId>
<version>1.3.1</version>
</dependency>
<!-- 用于监控 项目是否安全 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>
<!--开发工具 添加启动配置 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<optional>true</optional>
</dependency>
</dependencies>
</project>
使用lucene对数据库进行全文搜索前者例子会使内存溢出我们用批量及高亮的应用
添加maven依赖在pom.xml
<!-- 高亮的配置-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>4.7.2</version>
</dependency>
controller层
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
import com.et.service.FoodService;
import com.et.util.LuceneUtil;
@RestController
public class FoodController {
@Autowired
private FoodService service;
@GetMapping("/searchFood")
public List<Map> getFood(String keyWord) throws Exception{
return LuceneUtil.search("foodname", keyWord);
}
@GetMapping("/createIndex")
public String createIndex(){
try {
//数据库查询数据 查询数据批量查询
int queryFoodCount =service.queryFoodCount();
//第一次0--1000
//第二次1001--2000
//开始位置
int startIndex=0;
//总行数
int rows=1000;
//批量
while(startIndex<=queryFoodCount){
//每次拉取的数据
List<Map<String, Object>> queryFood=service.queryFood(startIndex, rows);
for(int i=0;i<queryFood.size();i++){
Map<String, Object> mso=queryFood.get(i);
Document doc=new Document();
Field field1=new Field("foodid",mso.get("foodid").toString(),TextField.TYPE_STORED);
Field field2=new Field("foodname",mso.get("foodname").toString(),TextField.TYPE_STORED);
Field field3=new Field("price",mso.get("price").toString(),TextField.TYPE_STORED);
Field field4=new Field("imagepath",mso.get("imagepath").toString(),TextField.TYPE_STORED);
doc.add(field1);
doc.add(field2);
doc.add(field3);
doc.add(field4);
//写入lucene索引中
LuceneUtil.write(doc);
}
startIndex+=1+rows;
}
} catch (IOException e) {
e.printStackTrace();
return "0";
}
return "1";
}
}
dao层
import java.util.List;
import java.util.Map;
public interface FoodDao {
/**
*分页获取数据
*/
public List<Map<String, Object>> queryFood(int start,int rows);
/**
* 获取总行数
*/
public int queryFoodCount();
}
import java.util.List;
import java.util.Map;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Repository;
import com.et.dao.FoodDao;
@Repository
public class FoodDaoImpl implements FoodDao{
@Autowired
private JdbcTemplate jdbc;
/**
*分页获取数据
*/
public List<Map<String, Object>> queryFood(int start, int rows) {
String sql="select * from food limit "+start+","+rows;
return jdbc.queryForList(sql);
}
/**
* 获取总行数
*/
@Override
public int queryFoodCount() {
String sql="select count(*) as foodCount from food";
return Integer.parseInt(jdbc.queryForList(sql).get(0).get("foodCount").toString());
}
}
service层
import java.util.List;
import java.util.Map;
public interface FoodService {
/**
*分页获取数据
*/
public List<Map<String, Object>> queryFood(int start,int rows);
/**
* 获取总行数
*/
public int queryFoodCount();
}
import java.util.List;
import java.util.Map;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.et.dao.FoodDao;
import com.et.service.FoodService;
@Service
public class FoodServiceImpl implements FoodService{
@Autowired
FoodDao dao;
/**
*分页获取数据
*/
@Override
public List<Map<String, Object>> queryFood(int start, int rows) {
return dao.queryFood(start, rows);
}
/**
* 获取总行数
*/
@Override
public int queryFoodCount() {
return dao.queryFoodCount();
}
}
util工具层
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class LuceneUtil {
static String dir="E:\\index";
//定义分词器
static Analyzer analyzer = new IKAnalyzer();
/**
* 创建索引
* @throws IOException
*/
public static void write(Document doc) throws IOException{
//索引库的存储目录
Directory directory = FSDirectory.open(new File(dir));
//关联lucene版本和当前分词器
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
//创建索引 (传入写入的目录和分词器)
IndexWriter iwriter = new IndexWriter(directory, config);
iwriter.addDocument(doc);
//提交事务
iwriter.commit();
iwriter.close();
}
/**
* 搜索索引
* @throws Exception
*/
public static List<Map> search(String field,String value) throws Exception {
//索引库的存储目录
Directory directory = FSDirectory.open(new File(dir));
//读取索引库的存储目录
DirectoryReader ireader = DirectoryReader.open(directory);
//搜索类
IndexSearcher isearcher = new IndexSearcher(ireader);
//lucene的查询解析器 用于指定查询的属性名和分词器
QueryParser parser = new QueryParser(Version.LUCENE_47, "foodname", analyzer);
//开始搜索
Query query = parser.parse(value);
//创建lucene的高亮对象 最终结果被分词添加前缀和后缀的处理类
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<font color=red>","</font>");
//高亮搜索的词 添加到高亮处理器中
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
//用来存储等分高的document 获取搜索的结果 指定返回的document个数
ScoreDoc[] hits = isearcher.search(query, null, 10).scoreDocs;
List<Map> list=new ArrayList<Map>();
//从ScoreDoc数组中获取单独的document
for (int i = 0; i < hits.length; i++) {
//获取document的编号
int id = hits[i].doc;
Document hitDoc = isearcher.doc(hits[i].doc);
Map map=new HashMap();
map.put("foodid", hitDoc.get("foodid"));
//获取你要高亮的词field
// String text = doc.get("notv");
String foodname=hitDoc.get("foodname");
//将查询的结果和搜素的次匹配添加前缀和后缀
TokenStream tokenStream = TokenSources.getAnyTokenStream(isearcher.getIndexReader(), id, "foodname", analyzer);
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, foodname, false, 10);
String foodNameHign="";
for (int j = 0; j < frag.length; j++) {
if ((frag[j] != null) && (frag[j].getScore() > 0)) {
foodNameHign=((frag[j].toString()));
}
}
//map中添加被高亮之后的值foodNameHign
map.put("foodname", foodNameHign);
// map.put("foodname", hitDoc.get("foodname"));
map.put("price", hitDoc.get("price"));
map.put("imagepath", hitDoc.get("imagepath"));
list.add(map);
}
ireader.close();
directory.close();
return list;
}
}
运行主类
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class Main {
public static void main(String[] args) {
//启动会加载自动配置
SpringApplication.run(Main.class, args);
}
}
serach.html
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Insert title here</title>
<script type="text/javascript" src="jquery-3.1.0.min.js"></script>
<script type="text/javascript">
$(function(){
$("#serachButton").click(function(){
$.ajax({
url:'searchFood',
data:'keyWord='+$("input[name='keyWord']").val(),
dataType:'json',
success:function(jd){
$("div[name='c']").remove()
for(var i=0;i<jd.length;i++){
var html="<div name='c'>"+
"<h3>"+jd[i].foodname+"</h3>"+
"<span>"+jd[i].foodname+"价格是:"+jd[i].price+"</span>"+
"<hr>"+
"</div> ";
$("#foodDiv").append(html);
}
}
})
})
})
</script>
</head>
<body>
<div>
<img src="1.png"><br/>
<input type="text" name="keyWord" style="border:1px solid #D3D3D3;width: 200px;height: 25px">
<input id="serachButton" type="button" value="开始搜索" style="background-color: #3D8BFF;color: white; border: 0px;height: 27px" />
<div id="foodDiv"></div>
</div>
</body>
</html>