开源的组件:lucence 2.1
全文搜索:select * from t1 where contains(name,'abc or test')
select * from t1 where name like '%abc%' or name like '%test%'
只要查询语句使用like语句,在做查询是肯定不会使用索引,即使表中已经
建立了索引,使用的是堆扫描,逐行查找
全文索引允许对模糊查询使用索引
lucence是使用全文索引,默认只显示符合条件的前100条记录
lucence是apache项目组,jakata
lucence是java开发,只是用核心api开发的
实现检索特定目录下的所有文本文件中出现某个单词(keyword)
目录:保存的是文本文件 .txt g:/1/test
生成索引目录: 保存索引文件,有可能有多个索引文件,索引文件需要手工生成 g:/1/index
Document 是lucence自带的,类似数据库中的表(记录)
Field : 字段
Hits:相当于数据库中的结果集
Hits结果集 IndexSearch
-----------------------------------------------------------------
Lucene使用例子:
StringTokenizer的用法:
package test2;
import java.util.StringTokenizer;
public class Test1 {
public static void main(String[] args) {
// TODO Auto-generated method stub
String abc = "( ) , .";
StringTokenizer st = new StringTokenizer("This is an example! (javatest) /njsp,test,abc.123",abc);
while (st.hasMoreTokens()){
System.out.println(st.nextToken());
}
}
}
-------------------------------------------------
package test2;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.util.StringTokenizer;
public class Test2 {
/**
* @param args
*/
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
System.out.print("请输入要统计的文件路径:");
String filePath = new BufferedReader(new InputStreamReader(System.in)).readLine();
System.out.print("请输入要统计的关键字");
String keyword = new BufferedReader(new InputStreamReader(System.in)).readLine();
String content = "";
BufferedReader br = new BufferedReader(new FileReader(filePath));
String message = "";
message = br.readLine();
int count = 0;
while (message != null){
content += message + "/n";
message = br.readLine();
}
System.out.println(content);
StringTokenizer st = new StringTokenizer(content);
while (st.hasMoreTokens()){
if (st.nextToken().equals(keyword)){
count++;
}
}
System.out.println("在" + filePath + "中共有" + keyword + count + "个");
br.close();
}
}
---------------------------------------------------------
Lucene的使用
生成索引……
package haha;
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
/**
* This class demonstrate the process of creating index with Lucene
* for text files
*/
public class LucenceIndex {
public static void main(String[] args) throws Exception{
//indexDir is the directory that hosts Lucene's index files
File indexDir = new File("g:/1/index");//用来保存索引文件
//dataDir is the directory that hosts the text files that to be indexed
File dataDir = new File("g:/1/test"); //test文件夹里面的文件将被建立索引
Analyzer luceneAnalyzer = new StandardAnalyzer();
File[] dataFiles = dataDir.listFiles();
IndexWriter indexWriter = new IndexWriter(indexDir,luceneAnalyzer,true);
long startTime = new Date().getTime();
//循环给多个文件建立索引
for(int i = 0; i < dataFiles.length; i++){
if(dataFiles[i].isFile() && dataFiles[i].getName().endsWith(".txt")){
System.out.println("Indexing file " + dataFiles[i].getCanonicalPath());
Document document = new Document();
Reader txtReader = new FileReader(dataFiles[i]);
//建立两个字段,路径字段(以path标识)和内容字段(以contents标识)
document.add(new Field("path",dataFiles[i].getPath(),Field.Store.YES,Field.Index.NO));
document.add(new Field("contents",txtReader));
indexWriter.addDocument(document);
}
}
indexWriter.optimize();
indexWriter.close();
long endTime = new Date().getTime();
System.out.println("It takes " + (endTime - startTime)
+ " milliseconds to create index for the files in directory "
+ dataDir.getPath());
}
}
-----------------------------------------------------------------------
做索引查询……
package haha;
import java.io.File;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
/**
* This class is used to demonstrate the
* process of searching on an existing
* Lucene index
*
*/
public class LucenceSearch {
public static void main(String[] args) throws Exception{
String queryStr = "jsp";
//This is the directory that hosts the Lucene index
File indexDir = new File("g:/1/index");
FSDirectory directory = FSDirectory.getDirectory(indexDir,false);
IndexSearcher searcher = new IndexSearcher(directory);
if(!indexDir.exists()){
System.out.println("The Lucene index is not exist");
return;
}
//对内容字段进行查询,查询字符串是jsp
Term term = new Term("contents",queryStr.toLowerCase());
TermQuery luceneQuery = new TermQuery(term);
Hits hits = searcher.search(luceneQuery);
System.out.println("his result is"+hits.length());
for(int i = 0; i < hits.length(); i++){
Document document = hits.doc(i);
System.out.println("File: " + document.get("path"));
//输出符合条件的路径字段
BufferedReader br = new BufferedReader((Reader)(document.get("content")));
System.out.println("Content: " + br.readLine());
}
}
}
====================================================================
package test3 ;
public class Constants
{
public final static String DATA_DIR = "g:/1x/2";
//数据目录
public final static INDEX_DIR = "g:/1x/idex2";
//索引目录
};
-----------------
package test3 ;
public class LuceneIndex
{
public static void main(String[] args)
{
String dataDir = Constants.DATA_DTR ;//数据目录
String indexDir = Constants.INDEX_DIR ;//索引目录
File fileDataDir = new File(dataDir) ;
File[] fileList = fileDataDir.listFiles() ;
//生成索引文件是通过IndexWriter来生成的,但是这个IndexWriter需要一个
//目录和一个分析器才能够构造出来
Analyzer analyzer = new StandardAnalyzer() ;
IndexWriter indexWriter = new IndexWriter(indexDir,analyzer,true) ;
//IndexWriter中只能添加文档类型对象
for(int i = 0 ; i < fileList.length ; i ++)
{
Document doc = new Document() ;
Reader reader = new FileReader(fileList[i]);
doc.add(new Field("path",fileList[i].getPath(),Field.Store.YES,Field.Index.NO));
doc.add(new Field("content",reader));
indexWriter.addDocument(doc);
}
indexWriter.optimize();
indexWriter.close();//必须关闭,否则无法生成索引
}
};
然后是查询
package test3 ;
public class LuceneSearch
{
public static void main(String[] args)
{
String indexDir = Constants.INDEX_DIR ;
String queryString = "java" ;
File file = new File(indexDir) ;
FSDirectory dir = FSDirectory.getDirectory(file);
//文件系统目录
//做查询要调用IndexSearcher的search方法。
Term term = new Term("contents",queryString.toLowerCase());
TermQuery query = new TermQuery(term) ;
IndexSearcher search = new IndexSearcher(dir) ;
Hits hits = search.search(query);
for(int i = 0 ; i < hits.length ; i ++)
{
Document doc = hits.doc(i);
System.out.println(doc.get("path"));
}
}
};
==============================================
package test3;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.InputStream;
import java.io.Reader;
import java.util.Properties;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
public class H1 {
/**
* @param args
*/
public void init() throws Exception {
// TODO Auto-generated method stub
//Constants.init();
Properties pro = new Properties();
InputStream is = this.getClass().getResourceAsStream("/pro.txt");
pro.load(is);
String dataDir = pro.getProperty("DATA_DIR");
System.out.println(dataDir);
String indexDir = pro.getProperty("INDEX_DIR");
File fileDataDir = new File(dataDir);
File fileIndexDir = new File(indexDir);
if (!fileDataDir.exists()){
fileDataDir.mkdirs();
}
if (!(fileIndexDir.exists())){
fileIndexDir.mkdirs();
}
File[] fileList = fileDataDir.listFiles();
Analyzer analyzer = new StandardAnalyzer();
IndexWriter indexWriter = new IndexWriter(indexDir,analyzer,true);
for (int i = 0; i < fileList.length; i++){
Document doc = new Document();
Reader reader = new FileReader(fileList[i]);
doc.add(new Field("path",fileList[i].getPath(),Field.Store.YES,Field.Index.NO));
doc.add(new Field("contents",reader));
indexWriter.addDocument(doc);
}
indexWriter.optimize();
indexWriter.close();
}
}
==================================================
package test3;
import java.io.File;
import java.io.InputStream;
import java.util.Properties;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
public class H2 {
/**
* @param args
*/
public String getPath(String queryString) throws Exception{
String content = "";
Properties pro = new Properties();
InputStream is = this.getClass().getResourceAsStream("/pro.txt");
pro.load(is);
// TODO Auto-generated method stub
String indexDir = pro.getProperty("INDEX_DIR");
File file = new File(indexDir);
FSDirectory dir = FSDirectory.getDirectory(file);
Term term = new Term("contents",queryString.toLowerCase());
TermQuery query = new TermQuery(term);
IndexSearcher search = new IndexSearcher(dir);
Hits hits = search.search(query);
for (int i = 0; i < hits.length(); i++){
Document doc = hits.doc(i);
System.out.println(doc.get("path"));
content += doc.get("path") + "<br>";
}
return content;
}
}