solr4.7.2开发实践 ——拼写检查spellcheck【需要修改为自己配置的】

最新推荐文章于 2024-05-19 10:50:47 发布

buster2014

最新推荐文章于 2024-05-19 10:50:47 发布

阅读量1.5k

点赞数

分类专栏： solr4.7.2搜索应用服务器

solr4.7.2搜索应用服务器专栏收录该内容

16 篇文章 0 订阅

订阅专栏

文章来源：http://my.oschina.net/MrMichael/blog/261242

①拼写检查不同于其他域，它在建立索引时需要分词，但是检索时不需要分词，所以要建立一个特殊的域，以供拼写检查：

在schema.xml文件里设置所需的拼写检查域都有哪些字段：

 
          
        < 
        field  
        name 
        = 
        "spell"  
        type 
        = 
        "text_spell"  
        indexed 
        = 
        "true"  
        stored 
        = 
        "false"  
        multiValued 
        = 
        "true"  
        /> 
       

           
       
 
        < 
        copyField  
        source 
        = 
        "name"  
        dest 
        = 
        "spell" 
        /> 
       
 
          
        < 
        copyField  
        source 
        = 
        "content"  
        dest 
        = 
        "spell" 
        /> 
       

           
       
 
        < 
        fieldType  
        name 
        = 
        "text_spell"  
        class 
        = 
        "solr.TextField"  
        positionIncrementGap 
        = 
        "100" 
        > 
       
 
              
        < 
        analyzer  
        type 
        = 
        "index" 
        > 
       
 
              
        < 
        tokenizer  
        class 
        = 
        "org.wltea.analyzer.lucene.IKTokenizerFactory"  
        useSmart 
        = 
        "false"  
        conf 
        = 
        "ik.conf" 
        /> 
       
 
              
        < 
        filter  
        class 
        = 
        "solr.StopFilterFactory"  
        ignoreCase 
        = 
        "true"  
        words 
        = 
        "stopwords.txt"  
        enablePositionIncrements 
        = 
        "true"  
        /> 
       
 
              
        </ 
        analyzer 
        > 
       
 
              
        < 
        analyzer  
        type 
        = 
        "query" 
        > 
       
 
                
        < 
        tokenizer  
        class 
        = 
        "solr.WhitespaceTokenizerFactory" 
        /> 
       
 
              
        </ 
        analyzer 
        > 
       
 
           
        </ 
        fieldType 
        > 
       

②在solrconfig.xml文件里设置：

 
          
        < 
        searchComponent  
        name 
        = 
        "spellerror"  
        class 
        = 
        "solr.SpellCheckComponent" 
        > 
       
 
             
        < 
        str  
        name 
        = 
        "queryAnalyzerFieldType" 
        >text_spell</ 
        str 
        > 
       
 
             
        <!-- a spellchecker built from a field of the main index   -->  
       
 
             
        < 
        lst  
        name 
        = 
        "spellchecker" 
        > 
       
 
               
        < 
        str  
        name 
        = 
        "name" 
        >default</ 
        str 
        > 
       
 
               
        < 
        str  
        name 
        = 
        "field" 
        >spell</ 
        str 
        > 
       
 
               
        < 
        str  
        name 
        = 
        "classname" 
        >solr.DirectSolrSpellChecker</ 
        str 
        > 
       
 
               
        < 
        str  
        name 
        = 
        "distanceMeasure" 
        >internal</ 
        str 
        > 
       
 
               
        < 
        float  
        name 
        = 
        "accuracy" 
        >0.5</ 
        float 
        > 
       
 
               
        < 
        int  
        name 
        = 
        "maxEdits" 
        >2</ 
        int 
        > 
       
 
               
        < 
        int  
        name 
        = 
        "minPrefix" 
        >1</ 
        int 
        > 
       
 
               
        < 
        int  
        name 
        = 
        "maxInspections" 
        >5</ 
        int 
        > 
       
 
               
        < 
        int  
        name 
        = 
        "minQueryLength" 
        >2</ 
        int 
        > 
       
 
               
        < 
        float  
        name 
        = 
        "maxQueryFrequency" 
        >0.01</ 
        float 
        > 
       
 
             
        </ 
        lst 
        > 
       
 
          
        < 
        lst  
        name 
        = 
        "spellchecker" 
        > 
       
 
            
        < 
        str  
        name 
        = 
        "classname" 
        >solr.FileBasedSpellChecker</ 
        str 
        > 
       
 
            
        < 
        str  
        name 
        = 
        "name" 
        >file</ 
        str 
        > 
       
 
            
        < 
        str  
        name 
        = 
        "sourceLocation" 
        >spellings.txt</ 
        str 
        > 
       
 
            
        < 
        str  
        name 
        = 
        "characterEncoding" 
        >UTF-8</ 
        str 
        > 
       
 
            
        < 
        str  
        name 
        = 
        "spellcheckIndexDir" 
        >spellcheckerFile</ 
        str 
        > 
       
 
             
        </ 
        lst 
        > 
       
 
           
        </ 
        searchComponent 
        > 
       
 
           
        < 
        requestHandler  
        name 
        = 
        "/spell"  
        class 
        = 
        "solr.SearchHandler"  
        startup 
        = 
        "lazy" 
        > 
       
 
             
        < 
        lst  
        name 
        = 
        "defaults" 
        > 
       
 
               
        < 
        str  
        name 
        = 
        "df" 
        >spell</ 
        str 
        > 
       
 
               
        < 
        str  
        name 
        = 
        "spellcheck.dictionary" 
        >default</ 
        str 
        > 
       
 
               
        < 
        str  
        name 
        = 
        "spellcheck" 
        >on</ 
        str 
        > 
       
 
               
        <!-- <str name="spellcheck.extendedResults">true</str>        
       
 
               
        <str name="spellcheck.count">10</str> 
       
 
               
        <str name="spellcheck.alternativeTermCount">5</str> 
       
 
               
        <str name="spellcheck.maxResultsForSuggest">5</str> -->        
       
 
               
        < 
        str  
        name 
        = 
        "spellcheck.collate" 
        >true</ 
        str 
        > 
       
 
               
        < 
        str  
        name 
        = 
        "spellcheck.collateExtendedResults" 
        >true</ 
        str 
        >   
       
 
               
        <!-- <str name="spellcheck.maxCollationTries">10</str> 
       
 
               
        <str name="spellcheck.maxCollations">5</str> -->           
       
 
             
        </ 
        lst 
        > 
       
 
             
        < 
        arr  
        name 
        = 
        "last-components" 
        > 
       
 
               
        < 
        str 
        >spellerror</ 
        str 
        > 
       
 
             
        </ 
        arr 
        > 
       
 
           
        </ 
        requestHandler 
        > 
       

③solrj里的代码

 
        /** 
       
        * @method: testSpellCheck 
       
        * @Description: 拼写检查  
       
        *  
       
        * @author: ChenYW 
       
        * @date 2014-4-15 下午06:14:56 
       
        */ 
       
        public  
        String spellCheck(String word){ 
       
        SolrQuery query =  
        new  
        SolrQuery();   
       
        query.set( 
        "defType" 
        , 
        "edismax" 
        ); 
        //加权 
       
        query.set( 
        "qf" 
        , 
        "name^20.0" 
        ); 
       
        query.set( 
        "spellcheck" 
        ,  
        "true" 
        );   
       
        query.set( 
        "spellcheck.q" 
        , word); 
       
        query.set( 
        "qt" 
        ,  
        "/spell" 
        );   
       
        query.set( 
        "spellcheck.build" 
        ,  
        "true" 
        ); 
        //遇到新的检查词，会自动添加到索引里面   
       
        query.set( 
        "spellcheck.count" 
        ,  
        5 
        ); 
       
        try  
        {   
       
        QueryResponse rsp = server.query(query);   
       
        SpellCheckResponse re=rsp.getSpellCheckResponse();   
       
        if  
        (re !=  
        null 
        ) {   
       
        if 
        (!re.isCorrectlySpelled()){ 
       
        String t = re.getFirstSuggestion(word); 
        //获取第一个推荐词   
       
        System.out.println( 
        "推荐词："  
        + t); 
       
        return  
        t; 
       
        }                   
       
        }  
       
        }  
        catch  
        (SolrServerException e) {   
       
        e.printStackTrace();   
       
        }   
       
        return  
        null 
        ; 
       
        }