目录如下:
d:\\index 存放索引文件
D:\testfolder 存放大文件分割好的小文件(很多)
d:/book.txt 存放原始文件
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
public class TestQuery {
public static void main(String[] args) throws IOException, ParseException {
Hits hits = null ;
String queryString = "把原来可能是军心涣散的溃退变成一场精神抖擞的胜利进军.进军到战略要地西北去,无疑是他们大战役的第二个基本原因,他们正确地预见到这个地区要对中,日,苏的当前命运将起决定性的作用.后来的历史证明,他们强调这个原因是完" ;
Query query = null ;
IndexSearcher searcher = new IndexSearcher( "d:\\index" );
Analyzer analyzer = new StandardAnalyzer();
try {
QueryParser qp = new QueryParser( "body" , analyzer);
query = qp.parse(queryString);
} catch (ParseException e) {
}
if (searcher != null ) {
hits = searcher.search(query);
if (hits.length() > 0 ) {
System.out.println( " 找到: " + hits.length() + " 个结果! " );
// System.out.println(searcher.getIndexReader().document( ));
}
for(int i=0; i<hits.length(); i++){
System.out.println(hits.id(i));
}
}
}
}
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
/** */ /**
* author lighter date 2006-8-7
*/
public class TextFileIndexer {
public static void main(String[] args) throws Exception {
/**/ /* 指明要索引文件夹的位置,这里是C盘的S文件夹下 */
File fileDir = new File( "d:\\testfolder" );
/**/ /* 这里放索引文件的位置 */
File indexDir = new File( "d:\\index" );
Analyzer luceneAnalyzer = new StandardAnalyzer();
IndexWriter indexWriter = new IndexWriter(indexDir,luceneAnalyzer, true);
File[] textFiles = fileDir.listFiles();
long startTime = new Date().getTime();
// 增加document到索引去
for ( int i = 0 ; i < textFiles.length; i ++ ) {
if (textFiles[i].isFile()
&& textFiles[i].getName().endsWith( ".txt" )) {
System.out.println( " File " + textFiles[i].getCanonicalPath()
+ " 正在被索引.... " );
String temp = FileReaderAll(textFiles[i].getCanonicalPath(),
"GBK" );
System.out.println(temp);
Document document = new Document();
Field FieldPath = new Field( "path" , textFiles[i].getPath(),
Field.Store.YES, Field.Index.NO);
Field FieldBody = new Field( "body" , temp, Field.Store.YES,
Field.Index.TOKENIZED,
Field.TermVector.WITH_POSITIONS_OFFSETS);
document.add(FieldPath);
document.add(FieldBody);
indexWriter.addDocument(document);
}
}
// optimize()方法是对索引进行优化
indexWriter.optimize();
indexWriter.close();
// 测试一下索引的时间
long endTime = new Date().getTime();
System.out
.println( " 这花费了 "
+ (endTime - startTime)
+ " 毫秒来把文档增加到索引里面去! "
+ fileDir.getPath());
}
public static String FileReaderAll(String FileName, String charset)
throws IOException {
BufferedReader reader = new BufferedReader( new InputStreamReader(
new FileInputStream(FileName), charset));
String line = new String();
String temp = new String();
while ((line = reader.readLine()) != null ) {
temp += line;
}
reader.close();
return temp;
}
}