1 在solr的webapp下创建一个classes文件夹:
IKAnalyzer.cfg.xml:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>IK Analyzer 扩展配置</comment>
<!--用户可以在这里配置自己的扩展字典 -->
<entry key="ext_dict">ext.dic</entry>
<!--用户可以在这里配置自己的扩展停止词字典-->
<entry key="ext_stopwords">my_ext_stopword.dic</entry>
</properties>
ext.dic:存放同义词:
厉害了我的国
相同,相似,相近
电脑,笔记本电脑=>计算机
my_ext_stopword.dic:存放停用词:
的
地
了
你
我
他
它
不
嗯
需要引入的jar:
配置solrconfig.xml:
<!--增量导入-->
<lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-dataimporthandler-.*\.jar" />
<requestHandler name="/dataimport" class="solr.DataImportHandler">
<lst name="defaults">
<str name="config">db-data-config.xml</str>
</lst>
</requestHandler>
db-data-config.xmf增量导入配置文件:
<dataConfig>
<dataSource
driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://*****:3306/yjcq_prod?useUnicode=true&characterEncoding=utf-8&serverTimezone=GMT"
user="**"
password="**"/>
<document>
<entity name="video" pk="id"
query="select * from db_video_copy "
deltaQuery="select id from db_video_copy where last_modify_time > '${dataimporter.last_index_time}'"
deltaImportQuery ="select * from db_video_copy where id = ${dih.delta.id}"
>
<!--对应统计表-->
<entity name="statistics" pk="video_id"
query="select play_num from db_user_video_statistics_copy where video_id = '${video.id}'"
deltaQuery="select video_id from db_user_video_statistics_copy where last_modify_time > '${dataimporter.last_index_time}'"
parentDeltaQuery ="select id from db_video_copy where id = ${statistics.video_id}"
>
<field column="play_num" name="play_num"/>
</entity>
</entity>
</document>
</dataConfig>
managed-schema配置文件添加字段与中文分词器 :
<!-- 定义中文分词器字段类型-->
<fieldType name="my_zh_text" class="solr.TextField" indexed="true" stored="true" >
<analyzer type="index">
<tokenizer class="com.dongnao.lucene.demo.analizer.ik.IkTokenizer4Lucene7Factory" useSmart="true" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" words="my_ext_stopword.dic"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="ext.dic"/>
</analyzer>
<analyzer type="query">
<tokenizer class="com.dongnao.lucene.demo.analizer.ik.IkTokenizer4Lucene7Factory" useSmart="true" />
</analyzer>
</fieldType>
<!-- 自定义的字段类型-->
<fieldType name="my_str" class="solr.StrField" />
<fieldType name="my_strs" class="solr.StrField" positionIncrementGap="100" multiValued="true"/>
<fieldType name="my_bool" class="solr.BoolField" />
<fieldType name="my_bools" class="solr.BoolField" multiValued="true"/>
<fieldType name="my_int" class="solr.IntPointField" />
<fieldType name="my_long" class="solr.LongPointField" />
<fieldType name="my_float" class="solr.FloatPointField" />
<fieldType name="my_double" class="solr.DoublePointField" />
<fieldType name="my_date" class="solr.DatePointField" />
<fieldType name="my_ints" class="solr.IntPointField" multiValued="true" />
<fieldType name="my_longs" class="solr.LongPointField" multiValued="true"/>
<fieldType name="my_floats" class="solr.FloatPointField" multiValued="true"/>
<fieldType name="my_doubles" class="solr.DoublePointField" multiValued="true"/>
<fieldType name="my_dates" class="solr.DatePointField" multiValued="true"/>
<fieldType name="my_str_only_stored" class="solr.StrField" indexed="false" />
<!--video表-->
<field name="video_name" type="my_zh_text" indexed="true" stored="true" />
<field name="title_second" type="my_zh_text" />
<field name="publish_time" type="my_str" />
<field name="create_time" type="my_date" />
<field name="module_id" type="my_long" />
<field name="module_name" type="my_zh_text" />
<field name="category_one_id" type="my_long" />
<field name="category_one_name" type="my_zh_text" />
<field name="category_two_id" type="my_long" />
<field name="category_two_name" type="my_zh_text" />
<field name="category_three_id" type="my_long" />
<field name="category_three_name" type="my_zh_text" />
<field name="hot" type="my_int" />
<field name="remark" type="my_zh_text" />
<field name="pay_type" type="my_int" />
<field name="free_audio_url" type="my_str" />
<field name="charge_audio_url" type="my_str"/>
<field name="free_video_url" type="my_str" />
<field name="charge_video_url" type="my_str" />
<field name="video_type" type="my_int" />
<field name="audio_img" type="my_str" />
<field name="video_img" type="my_str" />
<field name="image_url" type="my_str" />
<field name="charge" type="my_str" />
<field name="sort" type="my_long" />
<field name="video_time" type="my_long" />
<field name="spreak_man" type="my_zh_text" />
<field name="putaway" type="my_int" />
<field name="graphic_img_url" type="my_str" />
<field name="wx_img_url" type="my_str" />
<field name="label" type="my_zh_text" />
<field name="search" type="my_zh_text" />
<field name="titles_time" type="my_long" />
<field name="play_num" type="my_int" />
结果: