Index:
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
/**
* This class demonstrate the process of creating index with Lucene
* for text files
*/
public class TxtFileIndex {
public static void main(String[] args) throws Exception{
//设置索引地址
File indexDir = new File("D:\\luceneIndex");
//设置数据地址
File dataDir = new File("D:\\luceneData");
//建立分词
Analyzer luceneAnalyzer = new StandardAnalyzer();
//取得目录下所有Files
File[] dataFiles = dataDir.listFiles();
//建立indexWrite indexWrite主要作用是添加索引
IndexWriter indexWriter = new IndexWriter(indexDir,luceneAnalyzer,true);
//取得程序开启时间
long startTime = new Date().getTime();
//循环文件
for(int i = 0; i < dataFiles.length; i++){
//取出txt后缀的文档
if(dataFiles[i].isFile() && dataFiles[i].getName().endsWith(".txt")){
System.out.println("Indexing file " + dataFiles[i].getCanonicalPath());
//新建一个Document
Document document = new Document();
//读取数据
Reader txtReader = new FileReader(dataFiles[i]);
//Document添加path
document.add(new Field("path", dataFiles[i].getCanonicalPath(), Field.Store.YES, Field.Index.UN_TOKENIZED));
//Document添加正文
document.add(new Field("contents",txtReader));
//添加索引
indexWriter.addDocument(document);
}
}
indexWriter.optimize();
indexWriter.close();
long endTime = new Date().getTime();
//输出程序所用时间
System.out.println("It takes " + (endTime - startTime)
+ " milliseconds to create index for the files in directory "
+ dataDir.getPath());
}
}
Search:
import java.io.File;
//import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/**
* This class is used to demonstrate the
* process of searching on an existing
* Lucene index
*
*/
public class TxtFileSearcher {
public static void main(String[] args) throws Exception{
// Logger logger=Logger.getLogger(TxtFileSearcher.class);
//要查询的词组
String queryStr = "456";
//索引地址
File indexDir = new File("D:\\luceneIndex");
testFuzzySearch("D:\\luceneIndex");
//取得索引字典
/* FSDirectory directory = FSDirectory.getDirectory(indexDir,false);
//建立查询
IndexSearcher searcher = new IndexSearcher(directory);
//查询的索引地址是否存在
if(!indexDir.exists()){
System.out.println("The Lucene index is not exist");
return;
}
//建立term 查询docuemnt中contents中的内容(内容要转为大字)
Term term = new Term("contents",queryStr.toLowerCase());
//进行查询
TermQuery luceneQuery = new TermQuery(term);
//生成结果
Hits hits = searcher.search(luceneQuery);
for(int i = 0; i < hits.length(); i++){
//取得结果中的dowuemnt
Document document = hits.doc(i);
//取得返回的path属性
System.out.println("File: " + document.get("path"));
} */
}
public static void testFuzzySearch(String indexDirectory)throws Exception{
Directory dir = FSDirectory.getDirectory(indexDirectory,false);
IndexSearcher indexSearcher = new IndexSearcher(dir);
String[] searchWords = {"*12?", "456"};
Query query;
for(int i = 0; i < searchWords.length; i++){
query = new WildcardQuery(new Term("contents",searchWords[i]));
Hits results = indexSearcher.search(query);
System.out.println(results.length() + "search results for query " + searchWords[i]);
}
}
}
这里是用通配符查询的。