最近在学习Lucene,Lucene是一个开放源代码的全文检索引擎工具包。在网上找了一些小例子,无奈网上的资料大部分是2.0的,而我下的Lucene版本是3.6.2,由于很多API已经变了,因此程序报错。于是跑去官网查了下Apache3.6的API,后来再查阅了网上的一些资料,终于把问题解决。在这里贴上一个Lucene3.6的比较简单的例子,希望可以帮到像我一样处于初学阶段的人。
Lucene主要分为两部分,一部分是建立索引文件,另一部分是根据索引文件进行搜索查询。
Lucene主要分为两部分,一部分是建立索引文件,另一部分是根据索引文件进行搜索查询。
先来看看建立索引的部分
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class LuceneTest {
private static String ENCODE = "GBK";
/*
读取本地磁盘的待索引文件
*/
public static String openFile(File szFileName) {
try {
BufferedReader bis = new BufferedReader(new InputStreamReader(new FileInputStream(szFileName), ENCODE) );
String szContent="";
String szTemp;
while ( (szTemp = bis.readLine()) != null) {
szContent+=szTemp+"\n";
}
bis.close();
return szContent;
}
catch( Exception e ) {
return "";
}
}
/*
建立索引
*/
public void test() throws IOException {
File file = new File("G:/Workspaces/Eclipse/LuceneTest/indexdata"); //索引文件夹
File ReadFile = new File("G:/Workspaces/Eclipse/Heritrix/jobs/360BuyIndexDatabase"); //待建立索引文件所在文件夹
File[] textFiles = ReadFile.listFiles();
FSDirectory directory = FSDirectory.open(file);
//用来创建索引
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36,new StandardAnalyzer(Version.LUCENE_36));
IndexWriter writer = new IndexWriter(directory, conf);
for(int i=0;i<textFiles.length;i++){
if(textFiles[i].isFile()&&textFiles[i].getName().endsWith(".txt")){
System.out.println("File"+textFiles[i].getCanonicalPath()+"正在被索引");
String szContent = openFile(textFiles[i]);
Document doc = new Document();
Field FieldPath = new Field("path",textFiles[i].getPath(),Store.YES,Index.NO);
Field FieldBody = new Field("content",szContent,Store.YES,Index.ANALYZED);
System.out.println(szContent);
doc.add(FieldPath);
doc.add(FieldBody);
writer.addDocument(doc);
}
}
writer.close();//关闭writer索引才会写到磁盘上
}
public static void main(String[] args) throws IOException {
LuceneTest Test = new LuceneTest();
Test.test();
}
}
import java.io.File;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;
import org.apache.lucene.search.Query;
public class LuceneSearch{
public void Search()throws IOException{
File file = new File("G:/Workspaces/Eclipse/LuceneTest/indexdata"); //索引所在文件夹
FSDirectory directory = FSDirectory.open(file);
IndexReader reader = IndexReader.open(directory);
//简单索引
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
try{
QueryParser qp = new QueryParser(Version.LUCENE_36,"content",analyzer);
Query query = qp.parse("程序"); //搜索内容
ScoreDoc[] docs = searcher.search(query, 5).scoreDocs; //最匹配的前五个文档
for(int i = 0; i < docs.length; i++) {
Date start = new Date();
String content = searcher.doc(docs[i].doc).get("content");
Date end = new Date();
System.out.println(content + (end.getTime() - start.getTime()) + "ms");
}
}catch(ParseException e){
}
}
public static void main(String[] args)throws IOException{
LuceneSearch ls = new LuceneSearch();
ls.Search();
}
}
搜索结果: