--- 2010-06-18 17:00 代码修改: 修改重复创建索引,添加双字段高亮。
今天发一个简单的和数据库交互的Lucene示例,只是初步的靠Lucene自带的分词实现中文分词,效果肯定没有网上琳琅的分词器相媲美,只为了示例,用了自带的高亮。页面不好看,将就一下哦。
主要是由 Spring + Struts1 + Mysql 5 实现, 只是为了实现功能。请各位大侠拍砖。
好了,不多说了。贴代码:
1. T_ARTICLE 表
DROP TABLE IF EXISTS `t_article`;
CREATE TABLE `t_article` (
`ID` varchar(32) NOT NULL default '',
`ARTICLE_TITLE` varchar(255) default NULL,
`ARTICLE_TAG` varchar(255) default NULL,
`ARTICLE_CONTENT` text,
PRIMARY KEY (`ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
上面的数据 随便加一下吧,主要是article_content和article_title字段的数据,article_content是个text字段,所以你可以粘贴网页的文字内容添加到本字段中。
2.业务接口 ArticleService.java
package com.jushi.lucene.business;
import java.util.List;
import com.jushi.lucene.entities.Article;
/**
* @Author jushi
* @CreateDate Jun 4, 2010 1:24:42 PM
* @Version V 1.0
*/
public interface IArticleService {
public List<Article> getArticles(String query);
public boolean createIndex();
}
3. 接口的实现 ArticleServiceImpl.java 主要的业务在这里实现:创建索引,搜索数据处理。
package com.jushi.lucene.business;
import java.io.File;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import com.jushi.lucene.dao.IArticleDAO;
import com.jushi.lucene.entities.Article;
/**
* @Author jushi
* @CreateDate Jun 4, 2010 1:25:00 PM
* @Version V 1.0
*/
public class ArticleServiceImpl implements IArticleService {
private IArticleDAO articleDAO;
private final String INDEXPATH = "g:\\index";
private Analyzer analyzer = new StandardAnalyzer();
public List<Article> getArticles(String query) {
try{
List<Article> qlist = new ArrayList<Article>();
String fieldName = "title";
IndexSearcher indexSearcher = new IndexSearcher(INDEXPATH);
//QueryParser parser = new QueryParser(fieldName, analyzer); //单 key 搜索
//Query queryOBJ = parser.parse(query);
System.out.println(">>> 2.开始读取索引... ... 通过关键字:【 "+ query +" 】");
long begin = new Date().getTime();
//下面的是进行title,content 两个范围内进行收索.
BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD };
Query queryOBJ = MultiFieldQueryParser.parse(query, new String[]{"title","content"}, clauses, new StandardAnalyzer());//parser.parse(query);
Filter filter = null;
//################# 搜索相似度最高的记录 ###################
TopDocs topDocs = indexSearcher.search(queryOBJ, filter, 1000);
//TopDocs topDocs = indexSearcher.search(queryOBJ , 10000);
System.out.println("*** 共匹配:" + topDocs.totalHits + "个 ***");
Article article = null;
//输出结果
for (ScoreDoc scoreDoc : topDocs.scoreDocs){
Document targetDoc = indexSearcher.doc(scoreDoc.doc);
article = new Article();
//设置高亮显示格式
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'><strong>", "</strong></font>");
/* 语法高亮显示设置 */
Highlighter highlighter = new Highlighter(simpleHTMLFormatter,new QueryScorer(queryOBJ));
highlighter.setTextFragmenter(new SimpleFragmenter(100));
// 设置高亮 设置 title,content 字段
String title = targetDoc.get("title");
String content = targetDoc.get("content");
TokenStream titleTokenStream = analyzer.tokenStream(fieldName,new StringReader(title));
TokenStream contentTokenStream = analyzer.tokenStream("content",new StringReader(content));
String highLightTitle = highlighter.getBestFragment(titleTokenStream, title);
String highLightContent = highlighter.getBestFragment(contentTokenStream, content);
if(highLightTitle == null)
highLightTitle = title;
if(highLightContent == null)
highLightContent = content;
article.setTitle(highLightTitle);
article.setContent(highLightContent);
article.setTag(targetDoc.get("tag"));
article.setTotalHits(topDocs.totalHits);
qlist.add(article);
}
long end = new Date().getTime();
System.out.println(">>> 3.搜索完毕... ... 共花费:" + (end - begin) +"毫秒...");
indexSearcher.close();
return qlist;
}catch(Exception e){
e.printStackTrace();
return null;
}
}
public boolean createIndex()
{
//检查索引是否存在
if(this.isIndexExisted())
return this.isIndexExisted();
List<Article> list = articleDAO.getArticles();
try
{
Directory directory = FSDirectory.getDirectory(INDEXPATH);
IndexWriter indexWriter = new IndexWriter(directory, analyzer ,true, IndexWriter.MaxFieldLength.LIMITED);
long begin = new Date().getTime();
for(Article art: list)
{
Document doc = new Document();
String title = art.getTitle() == null ? "" : art.getTitle().trim();
String content = art.getContent() == null ? "" : art.getContent();
String tag = art.getTag() == null ? "" : art.getTag();
doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
doc.add(new Field("tag", tag, Field.Store.COMPRESS,Field.Index.NO));
indexWriter.addDocument(doc);
}
long end = new Date().getTime();
System.out.println(">>> 1.存入索引完毕.. 共花费:" + (end - begin) +"毫秒...");
indexWriter.optimize();
indexWriter.close();
return true;
}catch(Exception e){
e.printStackTrace();
return false;
}
}
/**
* check Index is Existed
* @return true or false
*/
private boolean isIndexExisted()
{
try
{
File dir = new File(INDEXPATH);
if(dir.listFiles().length>0)
return true;
else
return false;
}catch(Exception e){
e.printStackTrace();
return false;
}
}
public void setArticleDAO(IArticleDAO articleDAO) {
this.articleDAO = articleDAO;
}
}
4. DAO 接口及 DAO实现
public interface IArticleDAO {
public List<Article> getArticles();
}
public class ArticleDAOImpl extends JdbcDaoSupport implements IArticleDAO{
public List<Article> getArticles() {
String sql = "SELECT article_title, article_tag, article_content FROM t_article";
return (List<Article>)this.getJdbcTemplate().query(sql, new ArticleRowMapper());
}
}
5. 上面DAO需要的 ArticleRowMapper: 这里做了下小改动
package com.jushi.lucene.rowmapper;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import org.springframework.jdbc.core.RowMapper;
import com.jushi.lucene.entities.Article;
/**
* @Author jushi
* @CreateDate Jun 4, 2010 1:31:54 PM
* @Version V 1.0
*/
public class ArticleRowMapper implements RowMapper {
public Object mapRow(ResultSet rs, int rowNum) throws SQLException {
Article article = new Article();
ResultSetMetaData meta = rs.getMetaData();
int colNum = meta.getColumnCount();
for (int i = 1; i <= colNum; i++) {
String colName = meta.getColumnName(i).toLowerCase();
if ("article_title".equals(colName)) {
article.setTitle(rs.getString(i));
} else if ("article_content".equals(colName)) {
article.setContent(rs.getString(i));
} else if ("article_tag".equals(colName)) {
article.setTag(rs.getString(i));
//...
}
}
return article;
}
}
6. Article 实体 添加了两个属性: TotalHits, TotalTime
package com.jushi.lucene.entities;
/**
* @Author jushi
* @CreateDate Jun 4, 2010 1:18:48 PM
* @Version V 1.0
*/
public class Article {
private String title;
private String content;
private String tag;
private int TotalHits; //命中率
private int TotalTime; //花费时间
public int getTotalTime() {
return TotalTime;
}
public void setTotalTime(int totalTime) {
TotalTime = totalTime;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getTag() {
return tag;
}
public void setTag(String tag) {
this.tag = tag;
}
public int getTotalHits() {
return TotalHits;
}
public void setTotalHits(int totalHits) {
TotalHits = totalHits;
}
}
7. Spring 的配置如下:
--- applicationContext-lucene.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN 2.0//EN" "http://www.springframework.org/dtd/spring-beans-2.0.dtd">
<beans>
<bean id="articleDAO" class="com.jushi.lucene.dao.ArticleDAOImpl" scope="singleton" >
<property name="dataSource" ref="articleDataSource"></property>
</bean>
<bean id="articleService" class="com.jushi.lucene.business.ArticleServiceImpl" scope="singleton" >
<property name="articleDAO" ref="articleDAO"></property>
</bean>
</beans>
--- applicationContext-lucene-datasource.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN 2.0//EN" "http://www.springframework.org/dtd/spring-beans-2.0.dtd">
<beans>
<bean id="articleDataSource" class="org.apache.commons.dbcp.BasicDataSource" destroy-method="close">
<property name="driverClassName" value="com.mysql.jdbc.Driver" ></property>
<property name="url" value="jdbc:mysql://localhost/companyweb?useUnicode=true&characterEncoding=utf-8" />
<property name="username" value="root" />
<property name="password" value="jushi" />
<property name="initialSize" value="5" />
<property name="maxActive" value="20" />
<property name="maxIdle" value="20" />
<property name="minIdle" value="5" />
</bean>
</beans>
8. action配置 struts-config.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE struts-config PUBLIC "-//Apache Software Foundation//DTD Struts Configuration 1.2//EN" "http://struts.apache.org/dtds/struts-config_1_2.dtd">
<struts-config>
<form-beans>
<form-bean name="lazyForm" type="org.apache.struts.validator.LazyValidatorForm"/>
</form-beans>
<action-mappings>
<action path="/index"
type="com.jushi.lucene.businessview.IndexAction"
name="lazyForm" parameter="m" scope="request" validate="false">
<forward name="result" path="/result.jsp"></forward>
</action>
</action-mappings>
</struts-config>
9. web.xml 文件配置: 监听、加载配置文件
<?xml version="1.0" encoding="UTF-8"?>
<web-app version="2.5"
xmlns="http://java.sun.com/xml/ns/javaee"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://java.sun.com/xml/ns/javaee
http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd">
<context-param>
<param-name>contextConfigLocation</param-name>
<param-value>classpath*:/springConfig/applicationContext*.xml</param-value>
</context-param>
<!-- Character Encoding filter -->
<filter>
<filter-name>encodingFilter</filter-name>
<filter-class>org.springframework.web.filter.CharacterEncodingFilter</filter-class>
<init-param>
<param-name>encoding</param-name>
<param-value>UTF-8</param-value>
</init-param>
</filter>
<filter-mapping>
<filter-name>encodingFilter</filter-name>
<url-pattern>/*</url-pattern>
</filter-mapping>
<listener>
<listener-class>org.springframework.web.context.ContextLoaderListener</listener-class>
</listener>
<servlet>
<servlet-name>action</servlet-name>
<servlet-class>org.apache.struts.action.ActionServlet</servlet-class>
<init-param>
<param-name>config</param-name>
<param-value>/WEB-INF/struts-config.xml</param-value>
</init-param>
</servlet>
<servlet-mapping>
<servlet-name>action</servlet-name>
<url-pattern>*.html</url-pattern>
</servlet-mapping>
<welcome-file-list>
<welcome-file>index.jsp</welcome-file>
</welcome-file-list>
</web-app>
10. 前端 View 两个jsp index.jsp和 result.jsp
index.jsp
<%@ page language="java" import="java.util.*" pageEncoding="utf-8"%>
<%
String path = request.getContextPath();
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<base href="<%=basePath%>">
<title>简单的lucene测试</title>
<script type="text/javascript" src="js/CheckFunction.js"></script>
<script type="text/javascript">
function _search()
{
var form = document.form1;
if(Trim(form.title.value) == '')
{
alert("输入一个关键字吧!");
return false;
}
form.action = 'index.html?m=index';
form.submit();
}
</script>
</head>
<body>
<center><h1>测试Lucene</h1>
<hr/>
<form name="form1" action="index.html" method="post">
请输入文章的标题关键字:<input type="text" maxlength="20" name="title" size="40"/>
<button οnclick="javascript:_search();return false;" style="border: thin;border-color: blue" name="search"> GO </button>
</form>
</center>
</body>
</html>
result.jsp
<%@ page language="java" import="java.util.*" pageEncoding="utf-8"%>
<%@ page import="com.jushi.lucene.entities.*"%>
<%
String path = request.getContextPath();
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
%>
<%
List<Article> list = (List<Article>) request.getAttribute("articlelist");
String TotalTime = "0.000";
int TotalHits = 0;
if(request.getAttribute("totalTime")!=null)
TotalTime = request.getAttribute("totalTime").toString();
if(list.size()>0 && list!=null)
TotalHits = list.get(0).getTotalHits();
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<base href="<%=basePath%>">
<title>搜索的结果如下:</title>
<style type="text/css">
.Fee {
width: 650px;
height:28px;
float: right;
padding:1px;
overflow:hidden;
border-bottom:1px solid #bde2ff;
line-height:28px;
font-family: '宋体';
font-size: 15px;
font-weight: bold;
}
.Fee span {
color: red;
font-size:14px;
}
</style>
</head>
<body>
<div class="Fee">共搜到 <span><%=TotalHits %></span> 条结果. 共花费:<span><%=TotalTime %> </span>秒.</div><br/><br/>
<table width="80%" height="70%" border="1" cellpadding="0" cellspacing="0" align="center">
<tr height="20" bgcolor="#CCCCCC">
<td align="center" width="20%">标题</td>
<td align="center" width="60%">文章内容</td>
<td align="center" width="20%">所属标签</td>
</tr>
<%
if(list.size()>0){
for(Article art : list){
String title = art.getTitle();
String content = art.getContent();
String tag = art.getTag();
%>
<tr>
<td><%=title %></td>
<td><%=content %></td>
<td><%=tag %></td>
</tr>
<%}}else{ %>
<tr>
<td colspan="3" align="center"><h3>对不起...没有搜到相关信息..请尝试其他关键字!</h3></td>
</tr>
<%} %>
</table>
<br/>
<center><a href="<%=basePath %>./">返回上一级</a></center>
</body>
</html>
12. action:IndexAction.java
package com.jushi.lucene.businessview;
import java.text.DecimalFormat;
import java.util.Date;
import java.util.List;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.struts.action.ActionForm;
import org.apache.struts.action.ActionForward;
import org.apache.struts.action.ActionMapping;
import org.apache.struts.actions.DispatchAction;
import org.springframework.web.context.WebApplicationContext;
import org.springframework.web.context.support.WebApplicationContextUtils;
import com.jushi.lucene.business.IArticleService;
import com.jushi.lucene.entities.Article;
/**
* @Author fenglong
* @CreateDate Jun 4, 2010 1:50:02 PM
* @Version V 1.0
* @CopyRight 2010 jushi
*/
public class IndexAction extends DispatchAction {
public ActionForward index(ActionMapping mapping, ActionForm form,
HttpServletRequest request, HttpServletResponse response)
throws Exception {
WebApplicationContext wac = WebApplicationContextUtils.getRequiredWebApplicationContext(this.getServlet().getServletContext());
IArticleService articleService = (IArticleService) wac.getBean("articleService");
String q = request.getParameter("title");
DecimalFormat df = new DecimalFormat("#0.000");
if(articleService.createIndex()){
long begin = new Date().getTime();
List<Article> list = articleService.getArticles(q);
long end = new Date().getTime();
double time = (double) (end - begin) / 1000;
String timeStr = df.format(time);
request.setAttribute("totalTime", timeStr);
request.setAttribute("articlelist", list);
}
return mapping.findForward("result");
}
}
好了,所有的配置已经完成了。大概就是个Lucene的简单实现。只是稍微的和Spring结合了一下。功能就是搜索数据,高亮显示关键字。页面不好看,逻辑教简单。
欢迎拍砖。