Lucene

名茶

于 2013-04-17 10:27:30 发布

阅读量488

点赞数

分类专栏： Lucene 文章标签： Lucene

Lucene 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

Lucene首先要建立索引，才能进行搜索,使用了最新的lucene-4.2.1包

1.Indexer.java

[java]view plaincopy 
   
 /** 
  * 索引器 
  * @author shishengjie 
  * 
  */  
 public class Indexer {  
       
     private IndexWriter writer;//写索引，负责创建索引或打开已有索引等等  
   
     public Indexer(String indexDir) throws IOException {  
         //Directory描述索引存放的位置  
         Directory dir = FSDirectory.open(new File(indexDir));  
         //分析器，文本文件在被索引之前需要经过Analyzer处理  
         Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_42);  
         IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_42,  
                 luceneAnalyzer);  
         writer = new IndexWriter(dir, config);//创建写索引  
   
     }  
   
     public void close() throws IOException {  
         writer.close();  
     }  
     /** 
      * 文件过滤器 过滤所有非.txt文件 
      * @author shishengjie 
      * 
      */  
     private static class TextFilesFilter implements FileFilter {  
   
         @Override  
         public boolean accept(File pathname) {  
             // TODO Auto-generated method stub  
             return pathname.getName().toLowerCase().endsWith(".txt");  
         }  
   
     }  
     /** 
      * 建立索引 
      * @param dataDir 
      * @param filter 
      * @return 
      * @throws IOException 
      */  
     public int index(String dataDir, FileFilter filter) throws IOException {  
         File files[] = new File(dataDir).listFiles();//索引文件夹下所有文件  
         for (File f : files) {  
             if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()  
                     && (filter == null || filter.accept(f))) {  
                 indexFile(f);//将文件加入所索引  
             }  
         }  
         return writer.numDocs();  
     }  
   
     /** 
      * 向索引中添加文档 
      * @param f 
      * @throws IOException 
      */  
     private void indexFile(File f) throws IOException {  
         System.out.println("Indexing " + f.getCanonicalPath());  
         //根据文件名获取文档，Document代表一些域Filed的集合  
         Document doc = getDocument(f);  
         //加入到索引中  
         writer.addDocument(doc);  
     }  
   
     /** 
      * 根据文件返回Document 
      * @param f 
      * @return 
      * @throws IOException 
      */  
     private Document getDocument(File f) throws IOException {  
         Document doc = new Document();//创建文档  
         doc.add(new Field("contents", new FileReader(f)));  
         doc.add(new Field("filename", f.getName(), Field.Store.YES,  
                 Field.Index.NOT_ANALYZED));  
         doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES,  
                 Field.Index.NOT_ANALYZED));//添加域Filed  
         return doc;  
     }  
   
     public static void main(String[] args) {  
         try {  
             if (args.length != 2) {  
                 throw new IllegalAccessException("Usage:java "  
                         + Indexer.class.getName() + " <index dir> <data dir>");  
             }  
   
             String indexDir = args[0];  //此处存放索引文件  
             String dataDir = args[1];   //对该文件夹下的文件建立索引  
   
             long start = System.currentTimeMillis();  
             //创建索引，indexDir为索引存放位置  
             Indexer indexer = new Indexer(indexDir);  
             int numIndexed;  
             try {  
                 //对dataDir文件夹下的文件建立索引  
                 numIndexed = indexer.index(dataDir, new TextFilesFilter());  
             } finally {  
                 indexer.close();  
             }  
             long end = System.currentTimeMillis();  
             System.out.println("Indexing " + numIndexed + " files took "  
                     + (end - start) + " milliseconds");  
         } catch (Exception e) {  
             e.printStackTrace();  
         }  
     }  
   
 }  

运行时需要输入2个参数，第一个为要存放索引的文件夹，第二个为要索引哪个文件夹下的文件

如：C:\Users\shishengjie\Desktop\lucene\indexDir C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\

将会扫描C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\文件夹下的txt文件，为其建立索引，创建的索引存放在C:\Users\shishengjie\Desktop\lucene\indexDir下面

输出为：

[plain]view plaincopy 
   
 Indexing C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\CHANGES.txt  
 Indexing C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\JRE_VERSION_MIGRATION.txt  
 Indexing C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\LICENSE.txt  
 Indexing C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\MIGRATE.txt  
 Indexing C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\NOTICE.txt  
 Indexing C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\README.txt  
 Indexing C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\SYSTEM_REQUIREMENTS.txt  
 Indexing 21 files took 2479 milliseconds  

2.Searcher.java

[java]view plaincopy 
   
 /** 
  * 查找器 
  * @author shishengjie 
  * 
  */  
 public class Searcher {  
   
     public static void main(String[] args) throws Exception {  
   
         if (args.length != 2) {  
             throw new IllegalAccessException("Usage:java "  
                     + Searcher.class.getName() + " <index dir> <data dir>");  
         }  
         String indexDir = args[0];//索引文件夹  
         String q = args[1];//要检索的字符  
         search(indexDir, q);//查找  
     }  
   
     /** 
      * 查找 
      * @param indexDir 
      * @param q 
      * @throws Exception 
      */  
     private static void search(String indexDir, String q) throws Exception {  
         //索引存放的位置处创建Directory  
         Directory dir = FSDirectory.open(new File(indexDir));  
         // 读取索引的indexReader  
         IndexReader indexReader = IndexReader.open(dir);  
         // 创建indexSearcher，用于搜索由IndexWriter类创建的索引  
         IndexSearcher is = new IndexSearcher(indexReader);  
         //解析查询字符串  
         QueryParser parser = new QueryParser(Version.LUCENE_42, "contents",  
                 new StandardAnalyzer(Version.LUCENE_42));  
         //将人可读的查询解析为Query  
         Query query = parser.parse(q);  
         long start = System.currentTimeMillis();  
         //查询，以TopDocs对象的形式返回搜索结果集  
         //TopDocs是一个简单的指针容器，指向前N个排名的搜索结果  
         //TopDocs只包括对于文档的引用IndexSearcher.doc时才加载  
         TopDocs hits = is.search(query, 10);  
         long end = System.currentTimeMillis();  
         System.err.println("Found " + hits.totalHits + " document(s) (in "  
                 + +(end - start) + " milliseconds) that matched query '" + q  
                 + "'");  
         //输出匹配的文本  
         for (ScoreDoc scoreDoc : hits.scoreDocs) {  
             Document doc = is.doc(scoreDoc.doc);//返回匹配文本  
             System.out.println(doc.get("fullpath"));  
         }  
   
     }  
   
 }  

运行时需要输入2个参数，第一个为要存放索引的文件夹，第二个为要查询的单词

如：C:\Users\shishengjie\Desktop\lucene\indexDir patent

将会读取索引，查找patent文件

输出为：

[plain]view plaincopy 
   
 Found 6 document(s) (in 191 milliseconds) that matched query 'java'  
 C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\JRE_VERSION_MIGRATION.txt  
 C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\SYSTEM_REQUIREMENTS.txt  
 C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\README.txt  
 C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\NOTICE.txt  
 C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\LICENSE.txt  
 C:\Users\shishengjie\Desktop\lucene\lucene-4.2.1\CHANGES.txt  

名茶

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Lucene

Lucene首先要建立索引，才能进行搜索,使用了最新的lucene-4.2.1包1.Indexer.java[java] view plaincopy/** * 索引器 * @author shishengjie * */ public class Indexer { private
复制链接

扫一扫