用lucene3.6搜索数据库和txt文件内容

4 篇文章 0 订阅
1 篇文章 0 订阅
我们以前经常碰到搜索数据库的内容;用like %的sql语句;如果数据量大而且多表查询时;速度实在让人难以忍受。。。如果用lucene3.6那就可以把这个恼人的问题解决了。
 
lucene3.6搜索photo表的title,username,tagname,desr内容;
用一个例题来说明更直观;此例题能搜索中文分词;
(需要mysql5的jdbc包和lucene3.5的包):
 
1、数据库我用mysql5;建一个photo表;数据库名是test。
--
-- 表的结构 photo
--
CREATE TABLE IF NOT EXISTS 'photo'(
  'photo_id' int(10) unsigned NOT NULL AUTO_INCREMENT,
  'title' varchar(11) DEFAULT NULL,
  'descr' text,
  'user_name' varchar(11) DEFAULT NULL,
  'tag_name' varchar(11) DEFAULT NULL,
  PRIMARY KEY ('photo_id')
) ENGINE=InnoDB  DEFAULT CHARSET=utf8 ROW_FORMAT=REDUNDANT AUTO_INCREMENT=5 ;
--
-- 导出表中的数据 'photo'
--
INSERT INTO 'photo' ('photo_id', 'title', 'descr', 'user_name', 'tag_name') VALUES
(1, 'z美女j', 'h美女h', 't好人5', 'g美女femal'),
(2, 'l美女k', '5美女', '6美女', 'd美女female'),
(3, 'hagh', '4说的就是我的是f', '', NULL),
(4, 'hagh', '2说的就是我的是g', ' ', NULL);
 
2、java文件有4个:
 
文件Photo.java是数据库的photo表的操作文件;内容如下:
package test;
import java.sql.Connection;
import java.util.ArrayList;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
public class Photo {
 private long photoId;
 private String title;
 private String description;
 private String userName;
 private String tag;
 public String getDescription() {
  return description;
 }
 public void setDescription(String description) {
  this.description = description;
 }
 public long getPhotoId() {
  return photoId;
 }
 public void setPhotoId(long photoId) {
  this.photoId = photoId;
 }
 public String getTag() {
  return tag;
 }
 public void setTag(String tag) {
  this.tag = tag;
 }
 public String getTitle() {
  return title;
 }
 public void setTitle(String title) {
  this.title = title;
 }
 public String getUserName() {
  return userName;
 }
 public void setUserName(String userName) {
  this.userName = userName;
 }
 public static Photo[] loadPhotos(Connection con) throws Exception {
  ArrayList<Photo> list = new ArrayList<Photo>();
  PreparedStatement pstm = null;
  ResultSet rs = null;
  String sql = "select photo_id,title,descr,user_name,tag_name from photo";
  try {
   pstm = con.prepareStatement(sql);
   rs = pstm.executeQuery();
   while (rs.next()) {
    Photo photo = new Photo();
    photo.setPhotoId(rs.getLong(1));
    photo.setTitle(rs.getString(2));
    photo.setDescription(rs.getString(3));
    photo.setUserName(rs.getString(4));
    photo.setTag(rs.getString(5));
    list.add(photo);
   }
  } catch (SQLException e) {
   e.printStackTrace();
  } finally {
   if (rs != null) {
    rs.close();
   }
   if (pstm != null) {
    pstm.close();
   }
  }
  return (Photo[]) list.toArray(new Photo[list.size()]);
 }
}

文件IndexerFile.java是把数据库的内容备份成索引文件到磁盘中去;
内容如下:
package test;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.document.Field;
public class IndexerFile {
 public static int indexFile(String indexDir,Photo[] list) throws IOException{
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));
     conf.setOpenMode(OpenMode.CREATE);
     IndexWriter writer = new IndexWriter(FSDirectory.open(new File(indexDir)), conf);
 
  for(int i=0;i<list.length;i++){
   Document doc=new Document();
   doc.add(new Field("photoId", String.valueOf(list[i].getPhotoId()), Field.Store.YES, Field.Index.NO));
   if(list[i].getTitle()!=null && list[i].getTitle().length()>0)
    doc.add(new Field("title", list[i].getTitle(), Field.Store.YES, Field.Index.ANALYZED));
   if(list[i].getDescription()!=null && list[i].getDescription().length()>0)
    doc.add(new Field("description", list[i].getDescription(), Field.Store.YES, Field.Index.ANALYZED));
   if(list[i].getUserName()!= null && list[i].getUserName().length()>0)
   doc.add(new Field("userName", list[i].getUserName(), Field.Store.YES, Field.Index.ANALYZED));
   if(list[i].getTag()!= null && list[i].getTag().length()>0)
    doc.add(new Field("tag", list[i].getTag(), Field.Store.YES, Field.Index.ANALYZED));
   writer.addDocument(doc);
  }
 
  int numIndexed = writer.maxDoc();
  writer.forceMerge(1);
  writer.close();
  return numIndexed;
 }
}
 
文件SearcherFile.java是搜索磁盘索引文件内容的;
内容如下:
package test;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
public class SearcherFile {
 public static void search(IndexSearcher searcher, String[] q) throws IOException, ParseException {
  Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
  String[] fields = {"title","description","tag","userName"};   
        Query query = MultiFieldQueryParser.parse(Version.LUCENE_36, q, fields, analyzer);
        TopDocs topDocs = searcher.search(query, 100);//100是显示队列的Size
         ScoreDoc[] hits = topDocs.scoreDocs;
         System.out.println("共有" + searcher.maxDoc() + "条索引,命中" + hits.length + "条");
         for (int i = 0; i < hits.length; i++) {
             int DocId = hits[i].doc;
             Document document = searcher.doc(DocId);
             System.out.println("photoId==="+document.get("photoId"));
         }
 }
}
 
文件TestDb.java是操作的主文件;
内容如下:
package test;
import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Date;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.FSDirectory;
public class TestDb {
 public final static String indexDir ="E:\\TestLucene";
 private static Connection getConnection() {
  Connection conn = null;
  String url = "jdbc:mysql://localhost:3306/test";
  String userName = "root";
  String password = "root";
  try {
   Class.forName("com.mysql.jdbc.Driver");
   conn = java.sql.DriverManager
     .getConnection(url, userName, password);
  } catch (Exception e) {
   e.printStackTrace();
   System.out.println("Error Trace in getConnection() : "
     + e.getMessage());
  }
  return conn;
 }
 public static void main(String[] args) throws IOException, ParseException, SQLException {
  index();//做索引
  IndexSearcher searcher=null;
  try{
   IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexDir)));
      searcher = new IndexSearcher(reader);
   search(searcher);//搜索
  }catch(Exception e){
   e.printStackTrace();
  }finally{
   if(searcher!=null)
   searcher.close();
  }
 }
 public static void search(IndexSearcher searcher) throws IOException, ParseException{
  //以下是搜索的关键词
  String[] q = {"美女1","美女2","好人3","好人5"};
  long start=new Date().getTime();
  SearcherFile.search(searcher,q);
  long end=new Date().getTime();
  System.out.println("花费时间:"+(double)(end-start)/1000+"秒");
 }
 public static void index() throws SQLException{
  Connection conn = null;
  try {
   conn = getConnection();
   Photo[] list = Photo.loadPhotos(conn);
   IndexerFile.indexFile(indexDir,list);
  } catch (Exception e) {
   e.printStackTrace();
  } finally {
   if (conn != null) {
    conn.close();
   }
  }
 }
}
 
二、下面是lucene3.5搜索txt文本文件
 
建一个E:\\TestLucene\\fileS的文件夹,放需要搜索的文件。
在该文件夹里面随便建三个txt文件,"1.txt","2.txt"和"3.txt"

 

其中1.txt的内容如下: 

老周
北京人民
2009年

2.txt和3.txt也随便写些。

 

再建一个E:\\TestLucene\\fileIndex的文件夹;放索引文件。

 

 

java文件TestQueryFile:内容如下

 

package test;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.document.Field;
public class TestQueryFile {
 
      public static void main(String[] args) throws Exception {
        indexF();
        String queryString = "北京";
           Query query = null;
        IndexReader reader = IndexReader.open(FSDirectory.open(new File("E:\\TestLucene\\fileIndex")));
       IndexSearcher searcher = new IndexSearcher(reader);
           String fields = "body";
           try {
            QueryParser qp = new QueryParser(Version.LUCENE_36, fields, new StandardAnalyzer(Version.LUCENE_36));//有变化的地方
               query = qp.parse(queryString);
           } catch (ParseException e) {
           }
           if (searcher != null) {
               TopDocs topDocs = searcher.search(query, 100);//100是显示队列的Size
               ScoreDoc[] hits = topDocs.scoreDocs;
               System.out.println("共有" + searcher.maxDoc() + "条索引,命中" + hits.length + "条");
           }
       }
 
      private static void indexF() throws Exception {
         
           File fileDir = new File("E:\\TestLucene\\fileS");
 
         
           File indexDir = new File("E:\\TestLucene\\fileIndex");
        
        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));
         conf.setOpenMode(OpenMode.CREATE);
         IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexDir), conf);
       
           File[] textFiles = fileDir.listFiles();
           long startTime = new Date().getTime();
         
           //增加document到索引去
           for (int i = 0; i < textFiles.length; i++) {
               if (textFiles[i].isFile()
                       && textFiles[i].getName().endsWith(".txt")) {
                   System.out.println("File " + textFiles[i].getCanonicalPath()
                           + "正在被索引....");
                   String temp = FileReaderAll(textFiles[i].getCanonicalPath(),
                           "GBK");
                   System.out.println(temp);
                   Document document = new Document();
                   Field FieldPath = new Field("path", textFiles[i].getPath(),
                           Field.Store.YES, Field.Index.NO);
                   Field FieldBody = new Field("body", temp, Field.Store.YES,
                           Field.Index.ANALYZED,
                           Field.TermVector.WITH_POSITIONS_OFFSETS);
                   document.add(FieldPath);
                   document.add(FieldBody);
                   indexWriter.addDocument(document);
             }
           }
           //optimize()方法是对索引进行优化
           indexWriter.forceMerge(1);
           indexWriter.close();
         
           //测试一下索引的时间
           long endTime = new Date().getTime();
           System.out
                   .println("这花费了"
                           + (endTime - startTime)
                           + " 毫秒来把文档增加到索引里面去!"
                           + fileDir.getPath());
       }
 
       private static String FileReaderAll(String FileName, String charset)
               throws IOException {
           BufferedReader reader = new BufferedReader(new InputStreamReader(
                   new FileInputStream(FileName), charset));
           String line = null;
     StringBuffer temp = new StringBuffer("");
         
           while ((line = reader.readLine()) != null) {
               temp.append(line);
           }
           reader.close();
           return temp.toString();
       }
}

 一执行就知道结果了


  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值