参考官网://http://lucene.apache.org/core/5_3_1/demo/overview-summary.html#overview_description
package example;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
//http://lucene.apache.org/core/5_3_1/demo/overview-summary.html#overview_description
public class FileSearch {
public static void main(String[] args) throws Exception
{
String indexPath="E:\\LuceneIndex";//This is the directory that hosts the Lucene index
final Path indexDir = Paths.get(indexPath);
if (!Files.isReadable(indexDir))
{
System.out.println("Lucene index directory '" +indexDir.toAbsolutePath()+ "' does not exist or is not readable");
System.exit(1);
}
String queryField="contents";
String queryStr="mao yang a the other Michael Jordan";
FSDirectory directory=FSDirectory.open(Paths.get(indexPath));
IndexReader reader = DirectoryReader.open(directory);
/*
IndexSearcher是用来在建立好的索引上进行搜索的。它只能以只读的方式打开一个索引,所以可以有多个 IndexSearcher的实例在一个索引上进行操作。
*/
IndexSearcher searcher=new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer();
/*
The query parser is constructed with an analyzer used to interpret your query text in the same way the documents are interpreted:
finding word boundaries, downcasing, and removing useless words like 'a', 'an' and 'the'.
It's also possible to programmatically construct a rich Query object without using the query parser.
The query parser just enables decoding the Lucene query syntax into the corresponding Query object.
*/
QueryParser parser = new QueryParser(queryField, analyzer);
/*
Query是一个抽象类,他有多个实现,比如 TermQuery, BooleanQuery, PrefixQuery.
这个类的目的是把用户输入的查询字符串封装成 Lucene能够识别的 Query。
The Query object contains the results from the QueryParser which is passed to the searcher
*/
Query query = parser.parse(queryStr);
//Searching for: mao yang other ==>truely removes the stop words like 'a', 'an' and 'the'.
System.out.println("Searching for: " + query.toString(queryField));
//The IndexSearcher.search(query,n) method returns TopDocs with max n hits
TopDocs results=searcher.search(query, 5);
ScoreDoc[] hits=results.scoreDocs;
for(int i=0;i<hits.length;i++)
{
Document doc=searcher.doc(hits[i].doc);
System.out.println("File path: " + doc.get("path"));
System.out.println("File lastModified: " + doc.get("modified"));//output is "null", because FileIndex.java use "Field.Store.NO"
}
System.out.println("\n\nanother case:");
queryStr="morning";
/*
Term是搜索的基本单位,一个 Term对象有两个 String类型的域组成。
生成一个 Term对象可以有如下一条语句来完成:Term term = new Term(“fieldName”,”queryWord”);
其中第一个参数代表了要在文档的哪一个 Field上进行查找,第二个参数代表了要查询的关键词。
注意,如果不是用QueryParser,则不对”queryWord”进行任何解析,如果令queryStr="MAO Hang",则查不出任何结果
*/
Term term = new Term("contents", queryStr.toLowerCase());
/*
TermQuery 是抽象类 Query 的一个子类,它同时也是 Lucene支持的最为基本的一个查询类。
生成一个 TermQuery对象由如下语句完成: TermQuery termQuery = new TermQuery(new Term(“fieldName”,”queryWord”));
它的构造函数只接受一个参数,那就是一个 Term对象。
*/
TermQuery termQuery = new TermQuery(term);
results=searcher.search(termQuery, 2);
hits=results.scoreDocs;
for(int i=0;i<hits.length;i++)
{
Document doc=searcher.doc(hits[i].doc);
System.out.println("File path: " + doc.get("path"));
System.out.println("File lastModified: " + doc.get("modified"));//output is "null", because FileIndex.java use "Field.Store.NO"
}
reader.close();
}
}
参考文献:
[1] Mendes, Pablo N, Jakob, Max, Garc&#, et al. DBpedia spotlight: Shedding light on the web of documents[C]// Proceedings of the 7th International Conference on Semantic Systems. ACM, 2011:1-8.
[2] Han X, Sun L. A Generative Entity-Mention Model for Linking Entities with Knowledge Base.[J]. Proceeding of Acl, 2011:945-954.
[4] http://alias-i.com/lingpipe/demos/tutorial/ne/read-me.html
[5] http://wiki.dbpedia.org/Downloads2014
[6] http://www.oschina.net/p/jieba(结巴分词)