PrefixQuery类似于 数据库中的 like 'a%'查询
以下是测试代码,这种方式的缺陷是人为的分出了更多的term,目前还在找有没有更优的方法。
测试代码:
public static String index_path3="D:\\ix3";
public static void main(String[]args) throws Exception{
//createPrefixIndex();
//前缀查询
Query trquery = new PrefixQuery(new Term("pinyin","ghai" ));
IndexReader indexReader = IndexReader.open(FSDirectory.open(new File(index_path3)));
Searcher searcher = new IndexSearcher(indexReader);
TopDocs topDocs = searcher.search(trquery,100);
for (ScoreDoc hits:topDocs.scoreDocs){
Document doc = searcher.doc(hits.doc);
System.out.println("doc = "+doc.get("hotelName"));
}
}
/**
* 创建pinyin索引
*/
public static void createPrefixIndex(){
Analyzer analyzer = new IKAnalyzer();
PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(analyzer);
try {
//perFieldAnalyzerWrapper.addAnalyzer("price", new WhitespaceAnalyzer());
IndexWriter writer = new IndexWriter(FSDirectory.open(new File(index_path3)), perFieldAnalyzerWrapper, true,IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
String pinyin="shanghai";
int len = pinyin.length();
//将拼音截取保存
for(int i=0;i<len;i++){
String value = pinyin.substring(i,len);
doc.add(new Field("pinyin", value, Field.Store.YES, Field.Index.NOT_ANALYZED));
}
doc.add(new Field("hotelName", "test2", Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("hotelName", "test3", Field.Store.YES, Field.Index.ANALYZED));
pinyin="beijing";
len = pinyin.length();
for(int i=0;i<len;i++){
String value = pinyin.substring(i,len);
doc.add(new Field("pinyin", value, Field.Store.YES, Field.Index.NOT_ANALYZED));
}
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("hotelName", "test6", Field.Store.YES, Field.Index.ANALYZED));
pinyin="zhongguo_benxi";
len = pinyin.length();
for(int i=0;i<len;i++){
String value = pinyin.substring(i,len);
doc.add(new Field("pinyin", value, Field.Store.YES, Field.Index.NOT_ANALYZED));
}
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("hotelName", "test1", Field.Store.YES, Field.Index.ANALYZED));
pinyin="tianjin";
len = pinyin.length();
for(int i=0;i<len;i++){
String value = pinyin.substring(i,len);
doc.add(new Field("pinyin", value, Field.Store.YES, Field.Index.NOT_ANALYZED));
}
writer.addDocument(doc);
writer.close();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
以下是测试代码,这种方式的缺陷是人为的分出了更多的term,目前还在找有没有更优的方法。
测试代码:
public static String index_path3="D:\\ix3";
public static void main(String[]args) throws Exception{
//createPrefixIndex();
//前缀查询
Query trquery = new PrefixQuery(new Term("pinyin","ghai" ));
IndexReader indexReader = IndexReader.open(FSDirectory.open(new File(index_path3)));
Searcher searcher = new IndexSearcher(indexReader);
TopDocs topDocs = searcher.search(trquery,100);
for (ScoreDoc hits:topDocs.scoreDocs){
Document doc = searcher.doc(hits.doc);
System.out.println("doc = "+doc.get("hotelName"));
}
}
/**
* 创建pinyin索引
*/
public static void createPrefixIndex(){
Analyzer analyzer = new IKAnalyzer();
PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(analyzer);
try {
//perFieldAnalyzerWrapper.addAnalyzer("price", new WhitespaceAnalyzer());
IndexWriter writer = new IndexWriter(FSDirectory.open(new File(index_path3)), perFieldAnalyzerWrapper, true,IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
String pinyin="shanghai";
int len = pinyin.length();
//将拼音截取保存
for(int i=0;i<len;i++){
String value = pinyin.substring(i,len);
doc.add(new Field("pinyin", value, Field.Store.YES, Field.Index.NOT_ANALYZED));
}
doc.add(new Field("hotelName", "test2", Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("hotelName", "test3", Field.Store.YES, Field.Index.ANALYZED));
pinyin="beijing";
len = pinyin.length();
for(int i=0;i<len;i++){
String value = pinyin.substring(i,len);
doc.add(new Field("pinyin", value, Field.Store.YES, Field.Index.NOT_ANALYZED));
}
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("hotelName", "test6", Field.Store.YES, Field.Index.ANALYZED));
pinyin="zhongguo_benxi";
len = pinyin.length();
for(int i=0;i<len;i++){
String value = pinyin.substring(i,len);
doc.add(new Field("pinyin", value, Field.Store.YES, Field.Index.NOT_ANALYZED));
}
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("hotelName", "test1", Field.Store.YES, Field.Index.ANALYZED));
pinyin="tianjin";
len = pinyin.length();
for(int i=0;i<len;i++){
String value = pinyin.substring(i,len);
doc.add(new Field("pinyin", value, Field.Store.YES, Field.Index.NOT_ANALYZED));
}
writer.addDocument(doc);
writer.close();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}