给大家简单介绍使用lucene这个搜索引擎,这里我只讲使用,如果想知道它的原理或者更深层次的东西可以看看别的博客
* 1.构建Lucene索引
首先贴上maven的pom.xml文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>Maven</artifactId>
<groupId>com.hw</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>lucene</artifactId>
<packaging>war</packaging>
<name>lucene Maven Webapp</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<!-- jdbc驱动包 -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.44</version>
</dependency>
<!-- 添加Httpclient支持 -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.2</version>
</dependency>
<!-- 添加jsoup支持 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.1</version>
</dependency>
<!-- 添加日志支持 -->
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.16</version>
</dependency>
<!-- 添加ehcache支持 -->
<dependency>
<groupId>net.sf.ehcache</groupId>
<artifactId>ehcache</artifactId>
<version>2.10.3</version>
</dependency>
<!-- 添加commons io支持 -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>
</dependency>
<dependency>
<groupId>org.apache.struts</groupId>
<artifactId>struts2-core</artifactId>
<version>2.5.16</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>javax.servlet-api</artifactId>
<version>4.0.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>jstl</groupId>
<artifactId>jstl</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>taglibs</groupId>
<artifactId>standard</artifactId>
<version>1.1.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.tomcat/tomcat-jsp-api -->
<dependency>
<groupId>org.apache.tomcat</groupId>
<artifactId>tomcat-jsp-api</artifactId>
<version>8.5.31</version>
</dependency>
</dependencies>
<build>
<finalName>lucene</finalName>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<!-- see http://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_war_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.20.1</version>
</plugin>
<plugin>
<artifactId>maven-war-plugin</artifactId>
<version>3.2.0</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
数据库信息和存储位置
然后是生成文件的代码
public class IndexStarter {
// dao方法
private static BlogDao blogDao = new BlogDao();
//执行生成索引文件
public static void main(String[] args) {
//SmartChineseAnalyzer 中文分词器
IndexWriterConfig config = new IndexWriterConfig(new SmartChineseAnalyzer());
//索引文件生成位置
Directory d = null;
IndexWriter writer = null;
try {
//位置
d = FSDirectory.open(Paths.get(PropertiesUtil.getValue("indexPath")));
writer = new IndexWriter(d, config);
//为数据库中的所有所有文件构建索引
List<Map<String, Object>> list = blogDao.list(null, null);
Document doc = null;
for (Map<String, Object> map : list) {
doc = new Document();
doc.add(new StringField("id", (String) map.get("id"), Field.Store.YES));
//textfield为将该数据作为分词查询
doc.add(new TextField("title", (String) map.get("title"), Field.Store.YES));
doc.add(new StringField("url", (String) map.get("url"), Field.Store.YES));
writer.addDocument(doc);
}
} catch (IOException e) {
e.printStackTrace();
} catch (IllegalAccessException e) {
e.printStackTrace();
} catch (InstantiationException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
} finally {
//关闭流
if (writer != null) {
try {
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
* 2.读取索引文件,获取命中片段,使得命中片段高亮显示
action
public String execute() {
try {
HttpServletRequest request = ServletActionContext.getRequest();
if (StringUtils.isBlank(title)) {
List<Map<String, Object>> blogList = this.blogDao.list(title, null);
request.setAttribute("blogList", blogList);
} else {
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
IndexReader indexReader = DirectoryReader.open(FSDirectory.open(Paths.get(PropertiesUtil.getValue("indexPath"))));
IndexSearcher searcher = new IndexSearcher(indexReader);
//拿一句话到目录中的索引文件词库进行关键字碰撞
Query query = new QueryParser("title", analyzer).parse(title);
TopDocs docs = searcher.search(query, 100);
//关键字高亮
//将碰撞出来的关键字点亮
QueryScorer queryScorer = new QueryScorer(query);
//以什么实行点亮关键字
Formatter formatter = new SimpleHTMLFormatter("<span style='color:red'><b>", "</span><b>");
Highlighter highlighter = new Highlighter(formatter, queryScorer);
List<Map<String, Object>> blogList = new ArrayList<>();
Map<String, Object> map = null;
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
map = new HashMap<>();
//拿到文档
Document doc = searcher.doc(scoreDoc.doc);
map.put("id", doc.get("id"));
String title = doc.get("title");
if (StringUtils.isNotBlank(title)) {
title = highlighter.getBestFragment(analyzer, "title", title);
}
map.put("title", title);
map.put("url", doc.get("url"));
blogList.add(map);
}
request.setAttribute("blogList", blogList);
}
} catch (Exception e) {
e.printStackTrace();
}
return "blogList";
}
前台代码:
<body>
<form action="${pageContext.request.contextPath}/sy/blogAction.action"
method="post">
博客标题:<input type="text" name="title"> <input type="submit"
value="确定">
</form>
<table border="1" width="100%">
<tr>
<td>编号</td>
<td>名称</td>
<td>价格</td>
</tr>
<c:forEach items="${blogList }" var="blog">
<tr>
<td>${blog.id }</td>
<td>${blog.title }</td>
<td><a href="${blog.url }">${blog.title }</a></td>
</tr>
</c:forEach>
</table>
</body>
效果图:
能将一句话分词搜索