import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
* 创建处理文档的索引类,这里使用的是Lucene3.0
* @author wawa
*
*/
public class IndexTest {
public static void main(String [] args)
{
//存放索引的目录
String INDEX_STORE_PATH="D:\\java\\lucene\\indexCh2";
//需要建立索引的文件
String input="D:\\java\\lucene\\zhuxian";
try {
long start = new Date().getTime();
int docNum=createIndex(new File(INDEX_STORE_PATH), new File(input));
long end = new Date().getTime();
System.out.println("Indexing " + docNum + " files took " + (end - start) + " milliseconds");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static int createIndex(File indexDir, File dataDir) throws Exception
{
//与2的不同之处
IndexWriter writer=new IndexWriter(FSDirectory.open(indexDir), new StandardAnalyzer(Version.LUCENE_30), true,
IndexWriter.MaxFieldLength.LIMITED);
indexDirectory(writer, dataDir);
int numIndexed = writer.numDocs();
writer.optimize();
writer.close();
return numIndexed;
}
/**循环遍历目录下的所有.txt文件并进行索引
* @param writer
* @param dir
* @throws IOException
*/
private static void indexDirectory(IndexWriter writer, File dir)
throws IOException {
File[] files = dir.listFiles();
for (int i = 0; i < files.length; i++) {
File f = files[i];
if (f.isDirectory()) {
indexDirectory(writer, f); // recurse
} else if (f.getName().endsWith(".txt")) {
indexFile(writer, f);
}
}
}
/**对单个txt文件进行索引
* @param writer
* @param f
* @throws IOException
*/
private static void indexFile(IndexWriter writer, File f)
throws IOException {
if (f.isHidden() || !f.exists() || !f.canRead()) {
return;
}
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = new Document();
doc.add(new Field("contents",new FileReader(f)));//有变化的地方
doc.add(new Field("filename",f.getCanonicalPath(),Field.Store.YES, Field.Index.ANALYZED));//有变化的地方
writer.addDocument(doc);
}
}
import java.io.File;
import java.io.FileReader;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class SearchTest {
/**lucene3.0 搜索类
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String srotPath="D:\\java\\lucene\\indexCh2\\";
String keys="林惊羽";
try {
createSearch(new File(srotPath), keys);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void createSearch(File storPath,String keys) throws Exception
{
IndexSearcher searcher=new IndexSearcher(FSDirectory.open(storPath),true);//只读
String field="contents";
QueryParser parser = new QueryParser(Version.LUCENE_30, field, new StandardAnalyzer(Version.LUCENE_30));//有变化的地方
Query query = parser.parse(keys);
TopScoreDocCollector collector = TopScoreDocCollector.create(100 , false);//有变化的地方
long start = new Date().getTime();// start time
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
System.out.println(hits.length);
for (int i = 0; i < hits.length; i++) {
Document doc = searcher.doc(hits[i].doc);//new method searcher.doc()
System.out.println(doc.getField("filename")+" "+hits[i].toString()+" ");
}
long end = new Date().getTime();//end time
System.out.println("Found :" + collector.getTotalHits() +
" document(s) (in " + (end - start) +
" milliseconds) that matched query '" +
keys + "':");
}
}