lucene + IKAnalyzer 中文分词及索引,简单实例

  1. import org.apache.lucene.document.Document;   
  2. import org.apache.lucene.document.Field;   
  3. import org.apache.lucene.document.DateTools;   
  4. import org.apache.lucene.index.IndexWriter;   
  5. import org.apache.lucene.queryParser.QueryParser;   
  6. import org.apache.lucene.search.Hits;   
  7. import org.apache.lucene.search.IndexSearcher;   
  8. import org.apache.lucene.search.Query;   
  9. import org.apache.lucene.store.Directory;   
  10. import org.apache.lucene.store.FSDirectory;   
  11. import org.mira.lucene.analysis.IK_CAnalyzer;   
  12.   
  13. import java.io.File;   
  14. import java.io.FileNotFoundException;   
  15. import java.io.IOException;   
  16. import java.io.FileReader;   
  17. import java.util.Date;   
  18.   
  19. public class Searcher {   
  20.     private static String INDEX_DIR = Searcher.class.getResource("/").getPath()+"/data/index";//"c://lucene//index";   
  21.     private static String DOC_DIR =   Searcher.class.getResource("/").getPath();//"c://lucene//doc";   
  22.     public static void main(String[] args) throws Exception {   
  23.          String queryString;   
  24.          queryString = "测试";   
  25.          File indexDir = new File(INDEX_DIR);   
  26.          File docDir = new File(DOC_DIR);   
  27.          Date start = new Date();   
  28.         //必须先建索引   
  29.         try {   
  30.              IndexWriter writer = new IndexWriter(INDEX_DIR, new IK_CAnalyzer(), true);   
  31.              System.out.println("Indexing to directory '" + INDEX_DIR + "'...");   
  32.              indexDocs(writer, docDir);   
  33.              System.out.println("Optimizing...");   
  34.              writer.optimize();   
  35.              writer.close();   
  36.              Date end = new Date();   
  37.              System.out.println(end.getTime() - start.getTime() + " total milliseconds");   
  38.   
  39.          } catch (IOException e) {   
  40.              System.out.println(" caught a " + e.getClass() +   
  41.                     "/n with message: " + e.getMessage());   
  42.          }   
  43.         if (!indexDir.exists() || !indexDir.isDirectory()) {   
  44.             throw new Exception(indexDir   
  45.                      + " does not exist or is not a directory.");   
  46.          }   
  47.          search(indexDir, queryString);   
  48.      }   
  49.   
  50.     private static void indexDocs(IndexWriter writer, File file)   
  51.             throws IOException {   
  52.         if (file.canRead()) {   
  53.             if (file.isDirectory()) {   
  54.                  String[] files = file.list();   
  55.                 if (files != null) {   
  56.                     for (int i = 0; i < files.length; i++) {   
  57.                          indexDocs(writer, new File(file, files[i]));   
  58.                      }   
  59.                  }   
  60.              } else {   
  61.                  System.out.println("adding " + file);   
  62.                 try {   
  63.                      writer.addDocument(getDocument(file));   
  64.                  }   
  65.                 catch (FileNotFoundException fnfe) {   
  66.                     //   
  67.                  }   
  68.              }   
  69.          }   
  70.      }   
  71.   
  72.     private static Document getDocument(File f)   
  73.             throws java.io.FileNotFoundException {   
  74.          Document doc = new Document();   
  75.          doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.UN_TOKENIZED));   
  76.          doc.add(new Field("modified",   
  77.                  DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),   
  78.                  Field.Store.YES, Field.Index.UN_TOKENIZED));   
  79.          doc.add(new Field("contents", new FileReader(f)));   
  80.         return doc;   
  81.      }   
  82.   
  83.     public static void search(File indexDir, String q) throws Exception {   
  84.          Directory fsDir = FSDirectory.getDirectory(indexDir);   
  85.          IndexSearcher is = new IndexSearcher(fsDir);// ① 打开索引   
  86.          Query query = new QueryParser("contents", new IK_CAnalyzer()).parse(q); // ② 分析查询   
  87.         long start = new Date().getTime();   
  88.          Hits hits = is.search(query);// ③ 搜索索引   
  89.         long end = new Date().getTime();   
  90.          System.err.println("Found " + hits.length() + " document(s) (in "  
  91.                  + (end - start) + "milliseconds) that matched query" + q + ":");   
  92.         for (int i = 0; i < hits.length(); i++) {   
  93.              Document doc = hits.doc(i); // ④ 得到匹配的文档   
  94.              System.out.println("file: " + doc.get("path"));   
  95.          }   
  96.      }   
  97. }   
  98. 正向全切分分词器:org.mira.lucene.analysis.IK_CAnalyzer(适合建索引时使用)

    正向最大全切分分词器:org.mira.lucene.analysis.MIK_CAnalyzer(适合用户输入检索时使用)

来源:http://hi.baidu.com/happy19840402/blog/item/7f48ce2e462aff554fc226d6.html

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值