Lucene

开源的组件:lucence 2.1

全文搜索:select * from t1 where contains(name,'abc or test')

select * from t1 where name like '%abc%' or name like '%test%'

只要查询语句使用like语句,在做查询是肯定不会使用索引,即使表中已经
建立了索引,使用的是堆扫描,逐行查找

全文索引允许对模糊查询使用索引

lucence是使用全文索引,默认只显示符合条件的前100条记录

lucence是apache项目组,jakata

lucence是java开发,只是用核心api开发的

实现检索特定目录下的所有文本文件中出现某个单词(keyword)

目录:保存的是文本文件 .txt g:/1/test

生成索引目录: 保存索引文件,有可能有多个索引文件,索引文件需要手工生成  g:/1/index

Document 是lucence自带的,类似数据库中的表(记录)

Field : 字段

Hits:相当于数据库中的结果集

Hits结果集 IndexSearch
-----------------------------------------------------------------
Lucene使用例子:

StringTokenizer的用法:
package test2;

import java.util.StringTokenizer;

public class Test1 {

 public static void main(String[] args) {
  // TODO Auto-generated method stub
  String abc = "( ) , .";
  StringTokenizer st = new StringTokenizer("This is an example! (javatest) /njsp,test,abc.123",abc);
  while (st.hasMoreTokens()){
   System.out.println(st.nextToken());
  }
 }

}

-------------------------------------------------

package test2;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.util.StringTokenizer;

public class Test2 {

 /**
  * @param args
  */
 public static void main(String[] args) throws Exception {
  // TODO Auto-generated method stub
  System.out.print("请输入要统计的文件路径:");
  String filePath = new BufferedReader(new InputStreamReader(System.in)).readLine();
  System.out.print("请输入要统计的关键字");
  String keyword = new BufferedReader(new InputStreamReader(System.in)).readLine();
  String content = "";
  BufferedReader br = new BufferedReader(new FileReader(filePath));
  String message = "";
  message = br.readLine();
  int count = 0;
  while (message != null){
   content += message + "/n";
   message = br.readLine();
  }
  System.out.println(content);
  StringTokenizer st = new StringTokenizer(content);
  while (st.hasMoreTokens()){
   if (st.nextToken().equals(keyword)){
    count++;
   }
  }
  System.out.println("在" + filePath + "中共有" + keyword + count + "个");
  br.close();
 }

}
---------------------------------------------------------
Lucene的使用

生成索引……
package haha;

import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

 

/**
 * This class demonstrate the process of creating index with Lucene
 * for text files
 */
public class LucenceIndex {
 public static void main(String[] args) throws Exception{
  //indexDir is the directory that hosts Lucene's index files
        File   indexDir = new File("g:/1/index");//用来保存索引文件
        //dataDir is the directory that hosts the text files that to be indexed
        File   dataDir  = new File("g:/1/test"); //test文件夹里面的文件将被建立索引
        Analyzer luceneAnalyzer = new StandardAnalyzer();
        File[] dataFiles  = dataDir.listFiles();
        IndexWriter indexWriter = new IndexWriter(indexDir,luceneAnalyzer,true);
        long startTime = new Date().getTime();
  //循环给多个文件建立索引
        for(int i = 0; i < dataFiles.length; i++){
         if(dataFiles[i].isFile() && dataFiles[i].getName().endsWith(".txt")){
          System.out.println("Indexing file " + dataFiles[i].getCanonicalPath());
          Document document = new Document();
          Reader txtReader = new FileReader(dataFiles[i]);
         
   //建立两个字段,路径字段(以path标识)和内容字段(以contents标识)
          document.add(new Field("path",dataFiles[i].getPath(),Field.Store.YES,Field.Index.NO));
          document.add(new Field("contents",txtReader));
        
          indexWriter.addDocument(document);
         }
        }
        indexWriter.optimize();
        indexWriter.close();
        long endTime = new Date().getTime();
       
        System.out.println("It takes " + (endTime - startTime)
                           + " milliseconds to create index for the files in directory "
                     + dataDir.getPath());       
 }
}


-----------------------------------------------------------------------
做索引查询……
package haha;

import java.io.File;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;

/**
 * This class is used to demonstrate the
 * process of searching on an existing
 * Lucene index
 *
 */
public class LucenceSearch {
 public static void main(String[] args) throws Exception{
     String queryStr = "jsp";
     //This is the directory that hosts the Lucene index
        File indexDir = new File("g:/1/index");
        FSDirectory directory = FSDirectory.getDirectory(indexDir,false);
        IndexSearcher searcher = new IndexSearcher(directory);
        if(!indexDir.exists()){
         System.out.println("The Lucene index is not exist");
         return;
        }

  //对内容字段进行查询,查询字符串是jsp
        Term term = new Term("contents",queryStr.toLowerCase());
        TermQuery luceneQuery = new TermQuery(term);
        Hits hits = searcher.search(luceneQuery);
        System.out.println("his result is"+hits.length());
        for(int i = 0; i < hits.length(); i++){
         Document document = hits.doc(i);
         System.out.println("File: " + document.get("path"));
   //输出符合条件的路径字段

   BufferedReader br = new BufferedReader((Reader)(document.get("content")));
   System.out.println("Content: " + br.readLine());
        }
 }
}

====================================================================

package test3 ;

public class Constants
{
 public final static String DATA_DIR = "g:/1x/2";
 //数据目录

 public final static INDEX_DIR = "g:/1x/idex2";
 //索引目录
};

-----------------

package test3 ;

public class LuceneIndex
{
 public static void main(String[] args)
 {
  String dataDir = Constants.DATA_DTR ;//数据目录
  String indexDir = Constants.INDEX_DIR ;//索引目录

  File fileDataDir = new File(dataDir) ;
  File[] fileList = fileDataDir.listFiles() ;

  //生成索引文件是通过IndexWriter来生成的,但是这个IndexWriter需要一个
   //目录和一个分析器才能够构造出来
   Analyzer analyzer = new StandardAnalyzer() ;
   IndexWriter indexWriter = new IndexWriter(indexDir,analyzer,true) ;
   //IndexWriter中只能添加文档类型对象
  for(int i = 0 ; i < fileList.length ; i ++)
  {
   Document doc = new Document() ;
   Reader reader = new FileReader(fileList[i]);
   doc.add(new Field("path",fileList[i].getPath(),Field.Store.YES,Field.Index.NO));
   doc.add(new Field("content",reader));
   indexWriter.addDocument(doc);
  }
  indexWriter.optimize();
  indexWriter.close();//必须关闭,否则无法生成索引
 }
};

然后是查询

package test3 ;

public class LuceneSearch
{
 public static void main(String[] args)
 {
  String indexDir = Constants.INDEX_DIR ;
  String queryString = "java" ;
  File file = new File(indexDir) ;
  FSDirectory dir = FSDirectory.getDirectory(file);
  //文件系统目录

  //做查询要调用IndexSearcher的search方法。
  Term term = new Term("contents",queryString.toLowerCase());
  TermQuery query = new TermQuery(term) ;
  IndexSearcher search = new IndexSearcher(dir) ;
  Hits hits = search.search(query);

  for(int i = 0 ; i < hits.length ; i ++)
  {
   Document doc = hits.doc(i);
   System.out.println(doc.get("path"));
  }
 }
};

==============================================

package test3;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.InputStream;
import java.io.Reader;
import java.util.Properties;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;


public class H1 {

 

 

  /**
   * @param args
   */
  public  void init() throws Exception {
   // TODO Auto-generated method stub
   //Constants.init();
   Properties pro = new Properties();
   InputStream is = this.getClass().getResourceAsStream("/pro.txt");
   pro.load(is);
   String dataDir = pro.getProperty("DATA_DIR");
   System.out.println(dataDir);
  String indexDir = pro.getProperty("INDEX_DIR");
   File fileDataDir = new File(dataDir);
   File fileIndexDir = new File(indexDir);
   if (!fileDataDir.exists()){
    fileDataDir.mkdirs();
   }
   if (!(fileIndexDir.exists())){
    fileIndexDir.mkdirs();
   }
   File[] fileList = fileDataDir.listFiles();
   Analyzer analyzer = new StandardAnalyzer();
   IndexWriter indexWriter = new IndexWriter(indexDir,analyzer,true);
   for (int i = 0; i < fileList.length; i++){
    Document doc = new Document();
    Reader reader = new FileReader(fileList[i]);
    doc.add(new Field("path",fileList[i].getPath(),Field.Store.YES,Field.Index.NO));
    doc.add(new Field("contents",reader));
    indexWriter.addDocument(doc);
   }
   indexWriter.optimize();
   indexWriter.close();
  }

 

}
==================================================


package test3;

import java.io.File;
import java.io.InputStream;
import java.util.Properties;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;

public class H2 {

 


  /**
   * @param args
   */
  public  String getPath(String queryString) throws Exception{
   String content = "";
   Properties pro = new Properties();
   InputStream is = this.getClass().getResourceAsStream("/pro.txt");
   pro.load(is);
   // TODO Auto-generated method stub
   String indexDir = pro.getProperty("INDEX_DIR");
   
   File file = new File(indexDir);
   FSDirectory dir = FSDirectory.getDirectory(file);
   Term term = new Term("contents",queryString.toLowerCase());
   TermQuery query = new TermQuery(term);
   IndexSearcher search = new IndexSearcher(dir);
   Hits hits = search.search(query);
   for (int i = 0; i < hits.length(); i++){
    Document doc = hits.doc(i);
    System.out.println(doc.get("path"));
    content += doc.get("path") + "<br>";
   }
   return content;
   
  }


}
 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值