Lucene 多种搜索

lucene的搜索相当强大,它提供了很多辅助查询类,各自完成一种特殊的查询,也可以相互组合使用,来完成一些复杂的操作.

 

 

Java代码   收藏代码
  1. public class Test{  
  2.       
  3.     Analyzer analyzer = new StandardAnalyzer();  
  4.       
  5.     RAMDirectory directory = new RAMDirectory();  
  6.       
  7.     /**  
  8.      * 创建索引 
  9.      *  
  10.      * @throws IOException 
  11.      */  
  12.     public void index() throws IOException{  
  13.           
  14.         IndexWriter indexWriter = new IndexWriter(directory,analyzer,true);  
  15.           
  16.         Document doc1 = new Document();  
  17.           
  18.         doc1.add(new Field("title","aaabbb",Store.YES,Index.TOKENIZED));  
  19.           
  20.         doc1.add(new Field("content","If you would like to help promote OpenOffice",Store.YES,Index.TOKENIZED));  
  21.           
  22.         doc1.add(new Field("time","2005",Store.YES,Index.TOKENIZED));  
  23.           
  24.         indexWriter.addDocument(doc1);  
  25.           
  26.         Document doc2 = new Document();  
  27.           
  28.         doc2.add(new Field("title","bbcc",Store.YES,Index.TOKENIZED));  
  29.           
  30.         doc2.add(new Field("content","sdfsdfsdfasdds",Store.YES,Index.TOKENIZED));  
  31.           
  32.         doc2.add(new Field("time","2007",Store.YES,Index.TOKENIZED));  
  33.           
  34.         indexWriter.addDocument(doc2);  
  35.           
  36.         indexWriter.optimize();  
  37.           
  38.         indexWriter.close();  
  39.     }  
  40.       
  41.     // 按词条搜索  
  42.     public void termSearcher() throws IOException{  
  43.           
  44.         IndexSearcher searcher = new IndexSearcher(directory);  
  45.           
  46.         // 查询title中包含aaa  
  47.         Term term = new Term("title","aaa");  
  48.           
  49.         Query query = new TermQuery(term);  
  50.           
  51.         searcher.search(query);  
  52.       
  53.         searcher.close();  
  54.     }  
  55.   
  56.     // 短语搜索   
  57.     public void phraseSearcher() throws IOException{  
  58.           
  59.         IndexSearcher searcher = new IndexSearcher(directory);  
  60.           
  61.         PhraseQuery phraseQuery = new PhraseQuery();  
  62.           
  63.         // slop 两个项的位置之间允许的最大间隔,这里would和help中间只隔了like(to会被去掉),所以最大间隔设为1就能找到值  
  64.         // 对于两个相连的关键词来说,无论坡度设为多少,都可以找到  
  65.         // 对于不相连的词来说,当两个关键词相隔数小于坡度都可以找到,否则找不到  
  66.         phraseQuery.setSlop(1);  
  67.           
  68.         phraseQuery.add(new Term("content","would"));  
  69.           
  70.         phraseQuery.add(new Term("content","help"));  
  71.           
  72.         // 如果将help放前面,would放后面,需要将would向后移动3个位置才能到help后面(to不算),所以要设为slop最少要设为3  
  73.         // phraseQuery.add(new Term("content","help"));  
  74.           
  75.         // phraseQuery.add(new Term("content","would"));  
  76.           
  77.         // phraseQuery.setSlop(3);  
  78.           
  79.         // 短语的评分是根据项之间距离越小,评分越高,否则越小  
  80.         Hits hits =  searcher.search(phraseQuery);  
  81.           
  82.         printResult(hits);  
  83.           
  84.         searcher.close();  
  85.     }  
  86.       
  87.     // 通配符搜索 WildcardQuery     
  88.     // 通配符包括’?’匹配一个任意字符和’*’匹配零个或多个任意字符,例如你搜索’use*’,你可能找到’user’或者’uses’:     
  89.     public void wildcardSearcher() throws IOException{  
  90.           
  91.             IndexSearcher searcher = new IndexSearcher(directory);  
  92.               
  93.             // 与正则一样,*代表0个或多个字母,?代表0个或一个字母  
  94.             // WildcardQuery与QueryParser不同的是:WildcardQuery的前缀可以为*,而QueryParser不行  
  95.             WildcardQuery query = new WildcardQuery(new Term("content","a?bbb*"));  
  96.               
  97.             Hits hits = searcher.search(query);  
  98.               
  99.             printResult(hits);  
  100.               
  101.             searcher.close();  
  102.     }  
  103.       
  104.     // 模糊搜索 FuzzyQuery  
  105.     public void fuzzySearcher() throws IOException{  
  106.               
  107.             IndexSearcher search = new IndexSearcher(directory);  
  108.               
  109.             // OpenOffica虽然没被索引,但能找到相近的OpenOffice  
  110.             FuzzyQuery query = new FuzzyQuery(new Term("content","OpenOffica"));  
  111.               
  112.             Hits hits = search.search(query);  
  113.               
  114.             printResult(hits);  
  115.               
  116.             search.close();  
  117.     }  
  118.       
  119.     // 使用前缀PrefixQuery  
  120.     public void prefixSearcher() throws IOException{  
  121.           
  122.             IndexSearcher search = new IndexSearcher(directory);  
  123.               
  124.             // 全部title前缀为a  
  125.             PrefixQuery query = new PrefixQuery(new Term("title","b"));  
  126.               
  127.             Hits hits = search.search(query);  
  128.               
  129.             printResult(hits);  
  130.               
  131.             search.close();  
  132.               
  133.     }  
  134.   
  135.     // 范围搜索 RangeQuery  
  136.     public void rangeSearcher() throws IOException{  
  137.           
  138.         IndexSearcher search = new IndexSearcher(directory);  
  139.           
  140.         // RangeQuery query = new RangeQuery(beginTime, endTime, false);   
  141.         // 开始时间,结束时间,最后一个参数表示是否包含边界条件本身,如果为false  
  142.         RangeQuery query = new RangeQuery(new Term("time","2005"),new Term("time","2007"),true);  
  143.           
  144.         Hits hits = search.search(query);  
  145.           
  146.         printResult(hits);  
  147.           
  148.         search.close();  
  149.     }  
  150.       
  151.       
  152.     // 与或搜索BooleanQuery  
  153.     //BooleanClause用于表示布尔查询子句关系的类,包括:BooleanClause.Occur.MUST,BooleanClause.Occur.MUST_NOT,BooleanClause.Occur.SHOULD。有以下6种组合:  
  154.   
  155.     //1.MUST和MUST:取得连个查询子句的交集。  
  156.   
  157.     //2.MUST和MUST_NOT:表示查询结果中不能包含MUST_NOT所对应得查询子句的检索结果。  
  158.   
  159.     //3.MUST_NOT和MUST_NOT:无意义,检索无结果。  
  160.   
  161.     //4.SHOULD与MUST、SHOULD与MUST_NOT:SHOULD与MUST连用时,无意义,结果为MUST子句的检索结果。与MUST_NOT连用时,功能同MUST。  
  162.   
  163.     //5.SHOULD与SHOULD:表示“或”关系,最终检索结果为所有检索子句的并集。  
  164.   
  165.     public void booleanSearcher() throws IOException, ParseException{  
  166.           
  167.         IndexSearcher search = new IndexSearcher(directory);  
  168.           
  169.         QueryParser qp1 = new QueryParser("title",new StandardAnalyzer());  
  170.               
  171.         Query query1 = qp1.parse("aa*");  
  172.               
  173.         QueryParser qp2 = new QueryParser("title",new StandardAnalyzer());  
  174.               
  175.         Query query2 = qp2.parse("bb*");  
  176.               
  177.         BooleanQuery query = new BooleanQuery();  
  178.               
  179.         // 搜索结果的title的前双缀可以是aa,或bb  
  180.         query.add(query1, BooleanClause.Occur.SHOULD);  
  181.               
  182.         // BooleanClause.Occur.MUST 必须  
  183.         // BooleanClause.Occur.MUST_NOT 必须不是   
  184.         query.add(query2, BooleanClause.Occur.SHOULD);  
  185.           
  186.         Hits hits = search.search(query);  
  187.               
  188.         printResult(hits);  
  189.           
  190.         search.close();  
  191.           
  192.     }  
  193.       
  194.   
  195.     // 多关键的搜索 PhrasePrefixQuery  
  196.     public void phrasePrefixSearcher() throws IOException{  
  197.           
  198.         IndexSearcher search = new IndexSearcher(directory);  
  199.           
  200.         PhrasePrefixQuery query = new PhrasePrefixQuery();  
  201.           
  202.         // 这里两项都有可能首先被匹配  
  203.         query.add(new Term[]{new Term("content","would"),new Term("content","can")});  
  204.           
  205.         // 只有一项必须匹配  
  206.         query.add(new Term("content","help"));  
  207.           
  208.         // If you would like to help promote OpenOffice  
  209.         // can I help you  
  210.         // slop因子的作用域为查询中的所有短语  
  211.         query.setSlop(1);  
  212.           
  213.         // 匹配第一项为 would 或 can 第二项为help  
  214.         // solp设置为1   
  215.         // If you would like to help promote OpenOffice  除去if to 外,would与help的距离=1  
  216.         // can I help you 的距离也=1  所以可以搜索出两条数据  
  217.           
  218.         Hits hits = search.search(query);  
  219.           
  220.         printResult(hits);  
  221.           
  222.         search.close();  
  223.     }  
  224.       
  225.     // 在多个域上查询 MultiFieldQueryParser  
  226.     public void multiFieldSearcher() throws IOException, ParseException{  
  227.           
  228.         IndexSearcher search = new IndexSearcher(directory);  
  229.           
  230.         // 默认情况下的方式为Occur.SHOULD  
  231.         // titile可以匹配bb,content可以匹配you  
  232. //      MultiFieldQueryParser.parse(new String[]{"bb","you"},new String[]{"title","content"}, analyzer);  
  233.           
  234.         // titile必须匹配bb,content不能匹配  
  235.         Query query = MultiFieldQueryParser.parse( new String[]{"bb","you"},new String[]{"title","content"},new BooleanClause.Occur[]{Occur.MUST,Occur.MUST_NOT}, analyzer);  
  236.           
  237.         // title中必须包含bb  content不能有bb  
  238.         // Query query = MultiFieldQueryParser.parse( "bb*",new String[]{"title","content"},new BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.MUST_NOT}, analyzer);  
  239.           
  240.         Hits hits = search.search(query);  
  241.           
  242.         printResult(hits);  
  243.           
  244.         search.close();  
  245.     }  
  246.       
  247.     public void printResult(Hits hits) throws IOException{  
  248.         for(int i = 0; i < hits.length(); i++){  
  249.             Document d = hits.doc(i);  
  250.             System.out.println(d.get("title"));  
  251.             System.out.println(d.get("content"));  
  252.             System.out.println(d.get("time"));  
  253.         }  
  254.     }  
  255.       
  256.       
  257. }  
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值