Lucene是一个高性能、可伸缩的信息搜索(IR)库。它可以为你的应用程序添加索引和搜索能力。Lucene是用java实现的、成熟的开源项目,是著名的Apache Jakarta大家庭的一员,并且基于Apache软件许可 [ASF, License]。同样,Lucene是目前非常流行的、免费的Java信息搜索(IR)库。
针对数据库某张表的一些字段使用lucene来索引的demo。基本思想是使用传统的JDBC程序把数据读出来保存在容器中,然后对于容器的每条数据建立Docoment文档。建立好索引后就可以搜索索引。本例子使用lucene3.6.0和IKAnalyzer3.2.8分词器。
IndexCreateUtill类用来建立索引:
package com.ping.indexManager;
import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.ping.entity.NewsItem;
import com.ping.entity.NewsType;
import com.ping.jdbc.DBSource;
public class IndexCreateUtill {
private List<NewsItem> list;
public void createIndexForMynews() throws IOException, ClassNotFoundException{
//存放索引的文件夹
File indxeFile = new File("D:/lucene_news/mynews");
//创建Directory对象
Directory directory =FSDirectory.open(indxeFile);
//使用IKAnalyzer分词器
Analyzer analyzer = new IKAnalyzer();
//创建IndexWriterConfig
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_31, analyzer);
//创建IndexWriter
IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
//从数据库中读取出所有的新闻记录以便进行索引的创建
try {
DBSource dbSource =DBSource.getInstance();
Connection conn = dbSource.getConnection();
Statement stmt = null;
ResultSet rs = null;
String sql = "select * from t_newsitem";
stmt = conn.createStatement();
rs = stmt.executeQuery(sql);
list = new ArrayList<NewsItem>();
while(rs.next()){
NewsItem newsItem = new NewsItem();
newsItem.setId(rs.getInt("id"));
newsItem.setNewsTitle(rs.getString("newsTitle"));
newsItem.setNewsContent(rs.getString("newsContent"));
newsItem.setPublishTime(rs.getTimestamp("publishTime"));
newsItem.setResource(rs.getString("resource"));
newsItem.setT_newsType_id(rs.getInt("t_newsType_id"));
newsItem.setEditor(rs.getString("editor"));
list.add(newsItem);
}
DateFormat dateFormat = new SimpleDateFormat("yyyy年MM月dd日 HH时mm分ss秒");
for (int i=0;i<list.size();i++) {
//建立一个lucene文档
Document doc = new Document();
//得到新闻标题
String newsTitle = list.get(i).getNewsTitle();
//得到新闻内容
String newsContent = list.get(i).getNewsContent();
//得到新闻事件
String publishDate = dateFormat.format(list.get(i).getPublishTime());
//得到新闻主键id
String id = list.get(i).getId() + "";
//将新闻标题加入文档,因为要搜索和高亮,所以index是tokennized,TermVector是WITH_POSITIONS_OFFSETS
doc.add(new Field("title" , newsTitle , Field.Store.YES , Field.Index.ANALYZED , Field.TermVector.WITH_POSITIONS_OFFSETS));
//添加新闻内容至文档,与标题相似
doc.add(new Field("content" , newsContent , Field.Store.YES , Field.Index.ANALYZED , Field.TermVector.WITH_POSITIONS_OFFSETS));
//添加时间至文档,因为要按照此字段降序排列排序,所以tokenzied,不用高亮所以TermVector是no就行了
doc.add(new Field("date" , publishDate , Field.Store.YES , Field.Index.ANALYZED , Field.TermVector.NO));
//添加主键至文档,不分词,不高亮。
doc.add(new Field("id" , id , Field.Store.YES , Field.Index.NO , Field.TermVector.NO));
indexWriter.addDocument(doc);
}
indexWriter.close();
dbSource.closeAll(rs, stmt, conn);
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void main(String[] args) throws Exception {
IndexCreateUtill util = new IndexCreateUtill();
util.createIndexForMynews();
}
}
DBSource类是JDBC链接数据库的工具类:
package com.ping.jdbc;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Properties;
public class DBSource {
private Properties properties;
private String driver;
private String url;
private String username;
private String password;
private static DBSource instance =null;
public DBSource() throws IOException, ClassNotFoundException {
properties = new Properties();
properties.load(DBSource.class.getClassLoader().getResourceAsStream("jdbc.properties"));
driver = properties.getProperty("jdbc.driverClassName");
url = properties.getProperty("jdbc.url");
username = properties.getProperty("jdbc.username");
password = properties.getProperty("jdbc.password");
Class.forName(driver);
}
public static DBSource getInstance() throws IOException, ClassNotFoundException{
if (instance==null) {
synchronized (DBSource.class) {
if(instance==null){
instance = new DBSource();
}
}
}
return instance;
}
public Connection getConnection() throws SQLException{
return DriverManager.getConnection(url, username, password);
}
public void closeAll(ResultSet rs, Statement ps,Connection conn) throws SQLException{
closeResultSet(rs);
closeStatement(ps);
closeConnection(conn);
}
public static void closeConnection(Connection con) {
try {
if (con != null) {
con.close();
}
}catch (SQLException ex) {
ex.printStackTrace();
}
}
public static void closeStatement(Statement st) {
try {
if (st != null) {
st.close();
}
}catch (SQLException ex) {
ex.printStackTrace();
}
}
public static void closeResultSet(ResultSet rs) {
try {
if (rs != null) {
rs.close();
}
}catch (SQLException ex) {
ex.printStackTrace();
}
}
}
NewsItem类是持久化类:
package com.ping.entity;
import java.io.Serializable;
import java.util.Date;
public class NewsItem implements Serializable{
private static final long serialVersionUID = 1L;
private Integer id ;
private String newsTitle ;
private String newsContent;
private Date publishTime;
private String resource;
private Integer t_newsType_id;
private String editor;
public NewsItem() {
}
public NewsItem(Integer id, String newsTitle, String newsContent,
Date publishTime, String resource, Integer t_newsType_id, String editor) {
super();
this.id = id;
this.newsTitle = newsTitle;
this.newsContent = newsContent;
this.publishTime = publishTime;
this.resource = resource;
this.t_newsType_id = t_newsType_id;
this.editor = editor;
}
public Integer getId() {
return id;
}
public void setId(Integer id) {
this.id = id;
}
public String getNewsTitle() {
return newsTitle;
}
public void setNewsTitle(String newsTitle) {
this.newsTitle = newsTitle;
}
public String getNewsContent() {
return newsContent;
}
public void setNewsContent(String newsContent) {
this.newsContent = newsContent;
}
public Date getPublishTime() {
return publishTime;
}
public void setPublishTime(Date publishTime) {
this.publishTime = publishTime;
}
public String getResource() {
return resource;
}
public void setResource(String resource) {
this.resource = resource;
}
public Integer getT_newsType_id() {
return t_newsType_id;
}
public void setT_newsType_id(Integer t_newsType_id) {
this.t_newsType_id = t_newsType_id;
}
public String getEditor() {
return editor;
}
public void setEditor(String editor) {
this.editor = editor;
}
}