mac环境
从官网下载solr6.6.0
solr-6.6.0/bin下执行 ./solr start
http://localhost:8983/查看默认example的demo
下面创建我们自己的core
在solr-6.6.0/server/solr/下新建文件夹search_node,将solr-6.6.0/example/example-DIH/solr/solr下的所有文件和文件夹全部拷入search_node文件夹中
修改配置文件core.properties
name=search_node
config=solrconfig.xml
schema=solr-data-config.xml
dataDir=data
修改conf/managed-schema
1>加入中文分词类型text_ansj,导入的就jar包在下面会给出来
<fieldType name="text_ansj" class="solr.TextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer class="com.monetware.ansj4solr.AnsjTokenizerFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="com.monetware.ansj4solr.AnsjTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
2>添加需要从数据库中导入的字段
<field name="id" type="string" multiValued="false" indexed="true" required="true" stored="true"/>
<field name="title" type="text_ansj" multiValued="false" indexed="true" stored="true"/>
<field name="author" type="text_ansj" multiValued="false" indexed="true" stored="true"/>
<field name="tags" type="text_ansj" multiValued="false" indexed="true" stored="true"/>
<field name="publish_time" type="tdates" multiValued="false" indexed="true" stored="true"/>
<field name="source" type="text_ansj" multiValued="false" indexed="true" stored="true"/>
<field name="content" type="text_ansj" multiValued="false" indexed="true" stored="true"/>
修改conf/solr-data-config.xml
<?xml version="1.0" encoding="UTF-8" ?>
<dataConfig>
<dataSource type="JdbcDataSource"
driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://127.0.0.1:3306/socialwork"
user="root"
password="monetware"/>
<document>
<entity name="nw_news" pk="id"
query="select id as id ,title as title,tags as tags,author as author,source as source,content as content,DATE_ADD(publish_time,INTERVAL 8 hour) as publishTime from nw_news where status=3 order by publish_time desc"
deltaQuery="select id from nw_news where status=3 and last_update_time>'${dataimporter.last_index_time}'"
deletedPkQuery="select id from nw_news where status!='3'"
deltaImportQuery="select * from nw_news where status=3 and id='${dih.delta.id}'">
<field column="id" name="id"/>
<field column="title" name="title"/>
<field column="tags" name="tags"/>
<field column="author" name="author"/>
<field column="source" name="source"/>
<field column="abstract" name="abstract"/>
<field column="content" name="content"/>
<field column="publish_time" name="publish_time"/>
</entity>
</document>
</dataConfig>
添加实时更新索引配置
添加jar
dataimportscheduler-1.2.jar(修改过的,下载地址 http://download.csdn.net/download/kanshimekan/9704904)
solr-dataimporthandler-6.6.0.jar
solr-dataimporthandler-extras-6.6.0.jar
修改solr-6.6.0/server/solr-webapp/webapp/WEB-INF/web.xml
<listener>
<listener-class>
org.apache.solr.handler.dataimport.scheduler.ApplicationListener
</listener-class>
</listener>
添加更新配置文件
solr-6.6.0/server/solr下新建conf文件夹
在solr-6.6.0/server/solr/conf/conf下新建dataimport.properties文件
配置:
#################################################
# #
# dataimport scheduler properties #
# #
#################################################
# to sync or not to sync
# 1 - active; anything else - inactive
syncEnabled=1
# which cores to schedule
# in a multi-core environment you can decide which cores you want syncronized
# leave empty or comment it out if using single-core deployment
#syncCores=game,resource
syncCores=search_node
# solr server name or IP address
# [defaults to localhost if empty]
server=localhost
# solr server port
# [defaults to 80 if empty]
port=9090
interval=2
# application name/context
# [defaults to current ServletContextListener's context (app) name]
webapp=solr
# URL params [mandatory]
# remainder of URL
params=/dataimport?command=delta-import&clean=false&commit=true
# schedule interval
# number of minutes between two runs
# [defaults to 30 if empty]
# 重做索引的时间间隔,单位分钟,默认7200,即5天;
# 为空,为0,或者注释掉:表示永不重做索引
reBuildIndexInterval=7200
# 重做索引的参数
reBuildIndexParams=/dataimport?command=full-import&clean=true&commit=true
# 重做索引时间间隔的计时开始时间,第一次真正执行的时间=reBuildIndexBeginTime+reBuildIndexInterval*60*1000;
# 两种格式:2012-04-11 03:10:00 或者 03:10:00,后一种会自动补全日期部分为服务启动时的日期
reBuildIndexBeginTime=03:10:00
如果不配置reBuildIndexBeginTime会导致报错, Unable to convert 'interval' to number 00:00:00
这个是源码中的bug,
if ((this.reBuildIndexBeginTime == null) || (this.reBuildIndexBeginTime.isEmpty()))
this.interval = "00:00:00";
链接: https://pan.baidu.com/s/1bFZu7G 密码: pqjr
启动 solr-6.6.0/bin/solr start -port 9090
重启 solr-6.6.0/bin/solr restart -port 9090