lucene4.5简单实例

最新推荐文章于 2023-01-06 15:13:29 发布

南山行者

最新推荐文章于 2023-01-06 15:13:29 发布

阅读量713

点赞数

分类专栏： Java

Java 专栏收录该内容

12 篇文章 0 订阅

订阅专栏

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Main {

   public static void main(String[] args) throws InvalidTokenOffsetsException,
           ParseException, IOException, SQLException {
       createIndexForDatabase();
       searchIndex();
   }

   /**
   * 获得indexwriter对象
   *
   * @param dir
   * @return
   * @throws IOException
   * @throws Exception
   */
   private static IndexWriter getIndexWriter(Directory dir, Analyzer analyzer)
           throws IOException {
       IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40,
               analyzer);
       return new IndexWriter(dir, iwc);
   }

   /**
   * 关闭indexwriter对象
   *
   * @throws IOException
   *
   * @throws Exception
   */
   private static void closeWriter(IndexWriter indexWriter) throws IOException {
       if (indexWriter != null) {
           indexWriter.close();
       }
   }

   /**
   * 创建索引(本地文件)
   *
   * @throws InvalidTokenOffsetsException
   * @throws SQLException
   * @throws IOException
   */
   public static void createIndexForDirectory() throws InvalidTokenOffsetsException,
           SQLException, IOException {
       String indexPath = "C://index";
       Directory directory =FSDirectory.open(new File(indexPath));
       Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
       // 默认IKAnalyzer()-false:实现最细粒度切分算法,true:分词器采用智能切分
       IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40,
               analyzer);
       IndexWriter indexWriter = new IndexWriter(directory, iwc);

       try {
       directory = FSDirectory.open(new File(indexPath));
       indexWriter = getIndexWriter(directory, analyzer);
       } catch (Exception e) {
       System.out.println("索引打开异常！");
       }
       // 添加索引
       try {
       Document document = new Document();
       document.add(new TextField("filename", "标题:起点", Store.YES));
       document.add(new TextField("content", "内容：我是一名程序员", Store.YES));
       indexWriter.addDocument(document);
       Document document1 = new Document();
       document1.add(new TextField("filename", "标题:终点", Store.YES));
       document1.add(new TextField("content", "内容：我不再只是程序员", Store.YES));
       indexWriter.addDocument(document1);
       indexWriter.commit();
       } catch (IOException e1) {
       System.out.println("索引创建异常！");
       }
       try {
       closeWriter(indexWriter);
       } catch (Exception e) {
       System.out.println("索引关闭异常！");
       }

   }

   /**
   * 创建索引(数据库)
   *
   * @throws InvalidTokenOffsetsException
   * @throws SQLException
   * @throws IOException
   */
   public static void createIndexForDatabase() throws InvalidTokenOffsetsException,
           SQLException, IOException {
       String indexPath = "C://index";
       Directory directory =FSDirectory.open(new File(indexPath));
       Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
       // 默认IKAnalyzer()-false:实现最细粒度切分算法,true:分词器采用智能切分
       IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40,
               analyzer);
       IndexWriter indexWriter = new IndexWriter(directory, iwc);
       Connection conn =null;
       PreparedStatement st =null;
       ResultSet rs =null;
       try {

           conn = ConnectionUtil.getConnection();
           st = conn
                   .prepareStatement("SELECT * FROM hotel;");
           rs = st.executeQuery();
           rs.beforeFirst();
           while(rs.next()){
               // 添加索引
                   Document document = new Document();
                   document.add(new TextField("filename", rs.getString("name"), Store.YES));
                   document.add(new TextField("content", rs.getString("value1")==null? "" :rs.getString("value1"), Store.YES));
                   indexWriter.addDocument(document);
           }
           indexWriter.commit();
       } catch (Exception e) {
           System.out.println("索引关闭异常！");
       }finally {
           try {
               if (rs != null)
                   rs.close();
               if (st != null)
                   st.close();
               if (conn != null)
                   conn.close();
           } catch (SQLException e) {
               e.printStackTrace();
           }
       }
       closeWriter(indexWriter);
   }
   /**
   * 搜索
   *
   * @throws ParseException
   * @throws IOException
   * @throws InvalidTokenOffsetsException
   */
   public static void searchIndex() throws ParseException, IOException,
           InvalidTokenOffsetsException {

       Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
       String indexPath = "C://index"; // 建立索引文件的目录
       // 默认IKAnalyzer()-false:实现最细粒度切分算法,true:分词器采用智能切分

       Directory directory = null;
       try {
           directory = FSDirectory.open(new File(indexPath));
       } catch (Exception e) {
           System.out.println("索引打开异常！");
       }
       IndexReader ireader = null;
       IndexSearcher isearcher = null;
       try {
           ireader = IndexReader.open(directory);
       } catch (IOException e) {
           System.out.println("打开索引文件！");
       }
       isearcher = new IndexSearcher(ireader);
       String keyword = "单间";
       // 使用QueryParser查询分析器构造Query对象
       // eg:单个字段查询
       // String fieldName = "content";
       // QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName,
       // analyzer);
       String[] fields = { "filename", "content" };
       QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_40, fields,
               analyzer);
       qp.setDefaultOperator(QueryParser.AND_OPERATOR);
       Query query = qp.parse(keyword);
       // 搜索相似度最高的5条记录
       TopDocs topDocs = isearcher.search(query, 25);
       System.out.println("命中：" + topDocs.totalHits);
       // 输出结果
       ScoreDoc[] scoreDocs = topDocs.scoreDocs;
       for (int i = 0; i < topDocs.totalHits; i++) {
           Document targetDoc = isearcher.doc(scoreDocs[i].doc);
           System.out.println("酒店名称：" + targetDoc.get("filename"));
           System.out.println("房间类型：" + targetDoc.get("content"));
       }
       // 分页，高亮显示
       // higherIndex(analyzer, isearcher, query, topDocs);
   }

   /**
   * 分页，高亮显示
   *
   * @param analyzer
   * @param isearcher
   * @param query
   * @param topDocs
   * @throws IOException
   * @throws InvalidTokenOffsetsException
   */
   public static void higherIndex(Analyzer analyzer, IndexSearcher isearcher,
           Query query, TopDocs topDocs) throws IOException,
           InvalidTokenOffsetsException {
       TopScoreDocCollector results = TopScoreDocCollector.create(
               topDocs.totalHits, false);
       isearcher.search(query, results);
       // 分页取出指定的doc(开始条数, 取几条)
       ScoreDoc[] docs = results.topDocs(1, 2).scoreDocs;
       for (int i = 0; i < docs.length; i++) {
           Document targetDoc = isearcher.doc(docs[i].doc);
           System.out.println("内容：" + targetDoc.toString());
       }
       // 关键字高亮显示的html标签，需要导入lucene-highlighter-3.5.0.jar
       SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
               "<font color='red'>", "</font>");
       Highlighter highlighter = new Highlighter(simpleHTMLFormatter,
               new QueryScorer(query));
       for (int i = 0; i < docs.length; i++) {
           Document doc = isearcher.doc(docs[i].doc);
           // 标题增加高亮显示
           TokenStream tokenStream1 = analyzer.tokenStream("filename",
                   new StringReader(doc.get("filename")));
           String title = highlighter.getBestFragment(tokenStream1,
                   doc.get("filename"));
           // 内容增加高亮显示
           TokenStream tokenStream2 = analyzer.tokenStream("content",
                   new StringReader(doc.get("content")));
           String content = highlighter.getBestFragment(tokenStream2,
                   doc.get("content"));
           System.out.println(doc.get("filename") + " : " + title + " : "
                   + content);
       }
   }

   public static void contectSq() {

   }
}

南山行者

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
lucene4.5简单实例

import java.io.File;import java.io.IOException;import java.io.StringReader;import java.sql.Connection;import java.sql.PreparedStatement;import java.sql.ResultSet;import java.sql.SQLExcepti
复制链接

扫一扫