lucene 中文分词 内存索引

//package org.wltea.analyzer.sample; 
 
import java.io.IOException; 
 
import org.apache.lucene.analysis.Analyzer; 
import org.apache.lucene.document.Document; 
import org.apache.lucene.document.Field; 
import org.apache.lucene.index.CorruptIndexException; 
import org.apache.lucene.index.IndexReader; 
import org.apache.lucene.index.IndexWriter; 
import org.apache.lucene.index.IndexWriterConfig; 
import org.apache.lucene.index.IndexWriterConfig.OpenMode; 
import org.apache.lucene.queryParser.ParseException; 
import org.apache.lucene.queryParser.QueryParser; 
import org.apache.lucene.search.IndexSearcher; 
import org.apache.lucene.search.Query; 
import org.apache.lucene.search.ScoreDoc; 
import org.apache.lucene.search.TopDocs; 
import org.apache.lucene.store.Directory; 
import org.apache.lucene.store.LockObtainFailedException; 
import org.apache.lucene.store.RAMDirectory; 
import org.apache.lucene.util.Version; 
import org.wltea.analyzer.lucene.IKAnalyzer; 
 

public class IKAnalyzerDemo { 
  public static void main(String[] args){ 
    //Lucene Document的域名 
    String fieldName = "text"; 
     //检索内容 
    String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。"; 
    String text2 ="樊佩是帅哥";
    //实例化IKAnalyzer分词器 
    Analyzer analyzer = new IKAnalyzer(); 
    
    Directory directory = null; 
    IndexWriter iwriter = null; 
    IndexReader ireader = null; 
    IndexSearcher isearcher = null; 
    try { 
      //建立内存索引对象 
      directory = new RAMDirectory();    
       
      //配置IndexWriterConfig 
      IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_34 , analyzer); 
      iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); 
      iwriter = new IndexWriter(directory , iwConfig); 
      //写入索引 
      Document doc = new Document(); 
      doc.add(new Field("ID", "10000", Field.Store.YES, Field.Index.NOT_ANALYZED)); 
      doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED)); 
      iwriter.addDocument(doc); 
      iwriter.close(); 
       
              
      //搜索过程********************************** 
        //实例化搜索器    
      ireader = IndexReader.open(directory); 
      isearcher = new IndexSearcher(ireader);       
       
      String keyword = "中文分词工具包";          
      //使用QueryParser查询分析器构造Query对象 
      QueryParser qp = new QueryParser(Version.LUCENE_34, fieldName, analyzer); 
      qp.setDefaultOperator(QueryParser.AND_OPERATOR); 
      Query query = qp.parse(keyword); 
       
      //搜索相似度最高的5条记录 
      TopDocs topDocs = isearcher.search(query , 5); 
      System.out.println("命中:" + topDocs.totalHits); 
      //输出结果 
      ScoreDoc[] scoreDocs = topDocs.scoreDocs; 
      for (int i = 0; i < topDocs.totalHits; i++){ 
        Document targetDoc = isearcher.doc(scoreDocs[i].doc); 
        System.out.println("内容:" + targetDoc.toString()); 
      }       
       
    } catch (CorruptIndexException e) { 
      e.printStackTrace(); 
    } catch (LockObtainFailedException e) { 
      e.printStackTrace(); 
    } catch (IOException e) { 
      e.printStackTrace(); 
    } catch (ParseException e) { 
      e.printStackTrace(); 
    } finally{ 
      if(ireader != null){ 
        try { 
          ireader.close(); 
        } catch (IOException e) { 
          e.printStackTrace(); 
        } 
      } 
      if(directory != null){ 
        try { 
          directory.close(); 
        } catch (IOException e) { 
          e.printStackTrace(); 
        } 
      } 
    } 
  } 
} 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值