Lucene索引数据库

Lucene是一个高性能、可伸缩的信息搜索(IR)库。它可以为你的应用程序添加索引和搜索能力。Lucene是用java实现的、成熟的开源项目,是著名的Apache Jakarta大家庭的一员,并且基于Apache软件许可 [ASF, License]。同样,Lucene是目前非常流行的、免费的Java信息搜索(IR)库。

针对数据库某张表的一些字段使用lucene来索引的demo。基本思想是使用传统的JDBC程序把数据读出来保存在容器中,然后对于容器的每条数据建立Docoment文档。建立好索引后就可以搜索索引。本例子使用lucene3.6.0和IKAnalyzer3.2.8分词器。

IndexCreateUtill类用来建立索引:

package com.ping.indexManager;
 
import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
 
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
 
import com.ping.entity.NewsItem;
import com.ping.entity.NewsType;
import com.ping.jdbc.DBSource;
 
public class IndexCreateUtill {
 
    private List<NewsItem> list;
 
    public void createIndexForMynews() throws IOException, ClassNotFoundException{
        //存放索引的文件夹
        File indxeFile = new File("D:/lucene_news/mynews");
        //创建Directory对象
        Directory directory =FSDirectory.open(indxeFile);
        //使用IKAnalyzer分词器
        Analyzer analyzer = new IKAnalyzer();
        //创建IndexWriterConfig
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_31, analyzer);
        //创建IndexWriter
        IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
        //从数据库中读取出所有的新闻记录以便进行索引的创建
        try {
            DBSource dbSource =DBSource.getInstance();
            Connection conn = dbSource.getConnection();
            Statement stmt = null;
            ResultSet rs = null;
            String sql = "select * from t_newsitem";
            stmt = conn.createStatement();
            rs = stmt.executeQuery(sql);
 
            list = new ArrayList<NewsItem>();
            while(rs.next()){
                NewsItem newsItem = new NewsItem();
                newsItem.setId(rs.getInt("id"));
                newsItem.setNewsTitle(rs.getString("newsTitle"));
                newsItem.setNewsContent(rs.getString("newsContent"));
                newsItem.setPublishTime(rs.getTimestamp("publishTime"));
                newsItem.setResource(rs.getString("resource"));
                newsItem.setT_newsType_id(rs.getInt("t_newsType_id"));
                newsItem.setEditor(rs.getString("editor"));
                list.add(newsItem);
            }
            DateFormat dateFormat = new SimpleDateFormat("yyyy年MM月dd日 HH时mm分ss秒");
            for (int i=0;i<list.size();i++) {
                //建立一个lucene文档
                Document doc = new Document();
                //得到新闻标题
                String newsTitle = list.get(i).getNewsTitle();
                //得到新闻内容
                String newsContent = list.get(i).getNewsContent();
                //得到新闻事件
                String publishDate = dateFormat.format(list.get(i).getPublishTime());
                //得到新闻主键id
                String id = list.get(i).getId() + "";
                //将新闻标题加入文档,因为要搜索和高亮,所以index是tokennized,TermVector是WITH_POSITIONS_OFFSETS
                doc.add(new Field("title" , newsTitle , Field.Store.YES , Field.Index.ANALYZED , Field.TermVector.WITH_POSITIONS_OFFSETS));
                //添加新闻内容至文档,与标题相似
                doc.add(new Field("content" , newsContent , Field.Store.YES , Field.Index.ANALYZED , Field.TermVector.WITH_POSITIONS_OFFSETS));
                //添加时间至文档,因为要按照此字段降序排列排序,所以tokenzied,不用高亮所以TermVector是no就行了
                doc.add(new Field("date" , publishDate , Field.Store.YES , Field.Index.ANALYZED , Field.TermVector.NO));
                //添加主键至文档,不分词,不高亮。
                doc.add(new Field("id" , id , Field.Store.YES , Field.Index.NO , Field.TermVector.NO));
                indexWriter.addDocument(doc);
            }
            indexWriter.close();
            dbSource.closeAll(rs, stmt, conn);
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    public static void main(String[] args) throws Exception {
        IndexCreateUtill util  = new IndexCreateUtill();
        util.createIndexForMynews();
    }
}

DBSource类是JDBC链接数据库的工具类:

package com.ping.jdbc;
 
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Properties;
 
public class DBSource {
 
    private Properties properties;
    private String driver;
    private String url;
    private String username;
    private String password;
    private static DBSource instance =null;
 
    public DBSource() throws IOException, ClassNotFoundException {
        properties = new Properties();
        properties.load(DBSource.class.getClassLoader().getResourceAsStream("jdbc.properties"));
        driver = properties.getProperty("jdbc.driverClassName");
        url = properties.getProperty("jdbc.url");
        username = properties.getProperty("jdbc.username");
        password = properties.getProperty("jdbc.password");
        Class.forName(driver);
    }
 
    public static DBSource getInstance() throws IOException, ClassNotFoundException{
        if (instance==null) {
            synchronized (DBSource.class) {
                if(instance==null){
                    instance = new DBSource();  
                }
            }
        }
        return instance;
    }
    public Connection getConnection() throws SQLException{
        return DriverManager.getConnection(url, username, password);
    }
    public void closeAll(ResultSet rs, Statement ps,Connection conn) throws SQLException{
        closeResultSet(rs);
        closeStatement(ps);
        closeConnection(conn);
    }
    public static void closeConnection(Connection con) {
        try {
          if (con != null) {
            con.close();
          }
        }catch (SQLException ex) {
            ex.printStackTrace();
        }
      }
 
    public static void closeStatement(Statement st) {
        try {
          if (st != null) {
            st.close();
          }
        }catch (SQLException ex) {
            ex.printStackTrace();
        }
      }
 
    public static void closeResultSet(ResultSet rs) {
        try {
          if (rs != null) {
            rs.close();
          }
        }catch (SQLException ex) {
            ex.printStackTrace();
        }
      }
}

NewsItem类是持久化类:

package com.ping.entity;
 
import java.io.Serializable;
import java.util.Date;
 
public class NewsItem implements Serializable{
 
    private static final long serialVersionUID = 1L;
 
    private Integer id ; 
 
    private String newsTitle ;
 
    private String newsContent;
 
    private Date publishTime;
 
    private String resource;
 
    private Integer t_newsType_id;
 
    private String editor;
 
    public NewsItem() {
 
    }
 
    public NewsItem(Integer id, String newsTitle, String newsContent,
            Date publishTime, String resource, Integer t_newsType_id, String editor) {
        super();
        this.id = id;
        this.newsTitle = newsTitle;
        this.newsContent = newsContent;
        this.publishTime = publishTime;
        this.resource = resource;
        this.t_newsType_id = t_newsType_id;
        this.editor = editor;
    }
 
    public Integer getId() {
        return id;
    }
 
    public void setId(Integer id) {
        this.id = id;
    }
 
    public String getNewsTitle() {
        return newsTitle;
    }
 
    public void setNewsTitle(String newsTitle) {
        this.newsTitle = newsTitle;
    }
 
    public String getNewsContent() {
        return newsContent;
    }
 
    public void setNewsContent(String newsContent) {
        this.newsContent = newsContent;
    }
 
    public Date getPublishTime() {
        return publishTime;
    }
 
    public void setPublishTime(Date publishTime) {
        this.publishTime = publishTime;
    }
 
    public String getResource() {
        return resource;
    }
 
    public void setResource(String resource) {
        this.resource = resource;
    }
 
    public Integer getT_newsType_id() {
        return t_newsType_id;
    }
 
    public void setT_newsType_id(Integer t_newsType_id) {
        this.t_newsType_id = t_newsType_id;
    }
 
    public String getEditor() {
        return editor;
    }
 
    public void setEditor(String editor) {
        this.editor = editor;
    }
 
}

转载于:https://my.oschina.net/coolbash/blog/79464

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值