拼音检索的大致思路是这样的:
①将需要使用拼音检索的字段汇集到一个拼音分词字段里(我的拼音分词字段使用pinyin4j+NGram做的);
加入两个jar包:pinyin4j-2.5.0.jar、lucene-analyzers-smartcn-4.9.1.jar,pinyinAnalyzer.jar;将jar包拷入Java\apache-tomcat-7.0.55\webapps\solr\WEB-INF\lib下
schema.xml文件设置:注意将拼音的stored设置为true
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="title" type="text_ik" indexed="true" stored="true" /> <field name="author" type="text_general" indexed="true" stored="true" multiValued="true"/> <field name="keywords" type="text_general" indexed="true" stored="true"/> <field name="content" type="text_ik" indexed="true" stored="true"/> <field name="tc" type="text_ik" indexed="true" stored="true" multiValued="true"/> <copyField source="title" dest="tc"/> <copyField source="content" dest="tc"/> <field name="bookid" type="text_general" indexed="true" stored="true"/> <field name="bookname" type="text_general" indexed="true" stored="true"/> <field name="url" type="text_general" indexed="true" stored="true"/> <field name="resourcetype" type="int" indexed="true" stored="true" /> <field name="classifycode" type="text_general" indexed="true" stored="true"/> <field name="price" type="float" indexed="true" stored="true"/> <field name="updateTime" type="date" indexed="true" stored="true" /> <field name="pinyin" type ="text_pinyin" indexed ="true" stored ="true" multiValued ="true"/> <copyField source="title" dest="pinyin"/> <!-- by michael: pinyin -->
<
fieldType
name
=
"text_pinyin"
class
=
"solr.TextField"
positionIncrementGap
=
"0"
>
<
analyzer
type
=
"index"
>
<
tokenizer
class
=
"org.apache.lucene.analysis.cn.smart.SmartChineseSentenceTokenizerFactory"
/>
<
filter
class
=
"org.apache.lucene.analysis.cn.smart.SmartChineseWordTokenFilterFactory"
/>
<
filter
class
=
"com.shentong.search.analyzers.PinyinTransformTokenFilterFactory"
minTermLenght
=
"2"
/>
<
filter
class
=
"com.shentong.search.analyzers.PinyinNGramTokenFilterFactory"
minGram
=
"6"
maxGram
=
"20"
/>
</
analyzer
>
<
analyzer
type
=
"query"
>
<
tokenizer
class
=
"org.apache.lucene.analysis.cn.smart.SmartChineseSentenceTokenizerFactory"
/>
<
filter
class
=
"org.apache.lucene.analysis.cn.smart.SmartChineseWordTokenFilterFactory"
/>
<
filter
class
=
"com.shentong.search.analyzers.PinyinTransformTokenFilterFactory"
minTermLenght
=
"2"
/>
<
filter
class
=
"com.shentong.search.analyzers.PinyinNGramTokenFilterFactory"
minGram
=
"6"
maxGram
=
"20"
/>
</
analyzer
>
</
fieldType
>
|