定时增量导入
solr是7.7.3版本
solr启动命令:bin目录下cmd运行solr start
solr停止命令:bin目录下 solr stop -p 8983
上图是core位置
上图中data-config.xml是增量和数据同步配置
<dataConfig>
<!--dataSource标签配置数据库相关的信息-->
<dataSource name = "ds_1" type="JdbcDataSource"
driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://localhost:3306/db_wechat_monitor?characterEncoding=UTF-8" batchSize="-1"
user="root" password="123"/>
<document>
<!--pk属性指定的是manage-schema文件中的uniqueKey标签中的值,即主键-->
<entity name="dbo_wechat_record" dataSource="ds_1" PK="id" query="select
dwr.id AS id, record_code, client_id, dwr.wxid AS wxid,dwr.record_ctime AS record_ctime,dwr.record_stime AS record_stime, record_type, record_belong,
msg_source, mobile_sync, wxid_from, wxid_to, wxid_group, msgcode, extend_contect,
sscode, file_name, dwr.file_hash, file_size
,dwf.path path,dwf1.path patht , dwc.NickName Tname , dwc.HeadPath Theader ,dwc.Remark Tremark ,dwd.NickName Fname, dwd.HeadPath Fheader,dwd.Remark Fremark,dwg.NickName Gname, dwg.HeadPath Gheader,dwg.Remark Gremark,
wxid_special,content,msg_source_content
from dbo_wechat_record dwr LEFT JOIN dbo_wechat_file dwf ON dwf.file_hash = dwr.file_hash
LEFT JOIN dbo_wechat_file dwf1 ON dwf1.file_hash = dwr.extend_contect
LEFT JOIN dbo_wechat_contact dwc ON dwc.UserName = dwr.wxid_to
LEFT JOIN dbo_wechat_contact dwd ON dwd.UserName = dwr.wxid_from
LEFT JOIN dbo_wechat_contact dwg ON dwg.UserName = dwr.wxid_group "
deltaImportQuery="select
dwr.id AS id, record_code, client_id, dwr.wxid AS wxid,dwr.record_ctime AS record_ctime, dwr.record_stime AS record_stime, record_type, record_belong,
msg_source, mobile_sync, wxid_from, wxid_to, wxid_group, msgcode, extend_contect,
sscode, file_name, dwr.file_hash, file_size
,dwf.path path,dwf1.path patht , dwc.NickName Tname , dwc.HeadPath Theader ,dwc.Remark Tremark ,dwd.NickName Fname, dwd.HeadPath Fheader,dwd.Remark Fremark,dwg.NickName Gname, dwg.HeadPath Gheader,dwg.Remark Gremark,
wxid_special,content,msg_source_content
from dbo_wechat_record dwr LEFT JOIN dbo_wechat_file dwf ON dwf.file_hash = dwr.file_hash
LEFT JOIN dbo_wechat_file dwf1 ON dwf1.file_hash = dwr.extend_contect
LEFT JOIN dbo_wechat_contact dwc ON dwc.UserName = dwr.wxid_to
LEFT JOIN dbo_wechat_contact dwd ON dwd.UserName = dwr.wxid_from
LEFT JOIN dbo_wechat_contact dwg ON dwg.UserName = dwr.wxid_group where dwr.id='${dih.delta.id}'"
deltaQuery="select
dwr.id AS id
from dbo_wechat_record dwr LEFT JOIN dbo_wechat_file dwf ON dwf.file_hash = dwr.file_hash
LEFT JOIN dbo_wechat_file dwf1 ON dwf1.file_hash = dwr.extend_contect
LEFT JOIN dbo_wechat_contact dwc ON dwc.UserName = dwr.wxid_to
LEFT JOIN dbo_wechat_contact dwd ON dwd.UserName = dwr.wxid_from
LEFT JOIN dbo_wechat_contact dwg ON dwg.UserName = dwr.wxid_group where dwr.updateTime >'${dataimporter.last_index_time}'"
>
<!--以下的字段column属性对应数据库中字段名称,name是对应solr这边配置的名称,注意id,默认名称即为id,表示solr这边一条数据的主键,以下三个是要在solr这边中建立索引的字段,比如数据库中有10个字段,我只需要为3个字段建立索引关系,那这里就写3个就好了
如果数据库中的主键不是id,比如是objectId,那上边的query需要为它起一个别名为id即可-->
<field column="id" name="id" />
<field column="record_code" name="recordCode" />
<field column="content" name="content" />
<field column="msg_source_content" name="msgSourceContent" />
<field column="wxid_from" name="wxidFrom" />
<field column="wxid_to" name="wxidTo" />
<field column="wxid_special" name="wxidSpecial" />
<field column="record_ctime" name="recordCtime" />
<field column="wxid_group" name="wxidGroup" />
<field column="msg_source" name="msgSource" />
<field column="wxid" name="wxid" />
<field column="client_id" name="clientId" />
<field column="record_stime" name="recordStime" />
<field column="record_type" name="recordType" />
<field column="record_belong" name="recordBelong" />
<field column="msgcode" name="msgcode" />
<field column="mobile_sync" name="mobileSync" />
<field column="extend_contect" name="extendContect" />
<field column="sscode" name="sscode" />
<field column="file_name" name="fileName" />
<field column="file_hash" name="fileHash" />
<field column="file_size" name="fileSize" />
<field column="path" name="path" />
<field column="patht" name="patht" />
<field column="Tname" name="Tname" />
<field column="Theader" name="Theader" />
<field column="Fname" name="Fname" />
<field column="Fheader" name="Fheader" />
<field column="Gname" name="Gname" />
<field column="Gheader" name="Gheader" />
<field column="Tremark" name="Tremark" />
<field column="Fremark" name="Fremark" />
<field column="Gremark" name="Gremark" />
<field column="updateTime" name="updateTime" />
</entity>
</document>
</dataConfig>
上图是data-config中配置,上面部分是数据库配置和定时增量同步会执行的sql语句,需要同步的表需要添加updateTime字段,如下图
data-coonfig下面部分的field中column是数据库中字段名,name是索引名,对应了managed-schema配置文件,在column中出现的索引名都要加到managed-schema配置文件中,如下图
配置完这个就可以手动从数据库导入数据了
配置定时增量同步
1.solr-webapp/webapp/WEB-INF/lib中需要的jar包
2.在core同目录下添加conf文件夹,创建dataimport.properties
#Fri Jun 05 07:13:40 UTC 2020
interval=1
port=8983
server=localhost
params=/dataimport?command\=delta-import&clean\=false&commit\=true
webapp=solr
dbo_wechat_record.last_index_time=2020-06-05 07\:13\:39
reBuildIndexInterval=1400
syncEnabled=1
last_index_time=2020-06-05 07\:13\:39
reBuildIndexBeginTime=03\:10\:00
reBuildIndexParams=/select?qt\=/dataimport&command\=full-import&clean\=true&commit\=true
syncCores=solr_wechat
synEnabled是定时增量同步的时间间隔,单位是1分钟 需要修改的地方:syncCores,这是需要同步的core名字
3.在\solr-7.7.3\server\solr-webapp\webapp\WEB-INF\web.xml第一个servlet标签上添加
<listener>
<listener-class>org.apache.solr.handler.dataimport.scheduler.ApplicationListener</listener-class>
</listener>
全量同步更新和定时删除solr中过期数据
增量同步更新是根据数据库中的updateTime字段来判断是否需要更新的,在solr中没有数据的时候就不会全量更新,所以我在springboot项目中增加了随项目启动的全量更新任务run,定时删除过期数据任务printCurrentTime()一个小时执行一次,看是否过期是数据库中字段SystemRecordDate
@Component
@EnableScheduling
@Service
@Transactional
public class SolrJ implements CommandLineRunner {
/**
* 向solr全局导入所有数据
* @param args
* @throws Exception
*/
@Override
public void run(String... args) throws Exception {
System.out.println("全量导入任务开始执行");
String url = "http://localhost:8983/solr/solr_wechat/dataimport?command=full-import&clean=false&commit=true";
RestTemplate rest = new RestTemplate();
rest.getMessageConverters().add(0, new StringHttpMessageConverter(Charset.forName("UTF-8")));
rest.getForObject(new URI(url), String.class);
}
/**
* 定时删除solr中过期数据任务
* @throws IOException
* @throws SolrServerException
*/
@Scheduled(fixedDelay = 3600000)//该方法执行完毕一小时后开始下一次任务
public void printCurrentTime() throws IOException, SolrServerException {
//System.out.println("solr数据过期删除开始执行");
int deleteTime = wechatRecordService.querySystemRecordDate();
Calendar cal = Calendar.getInstance();
cal.setTime(new Date());
cal.add(Calendar.DATE, - deleteTime);
Date date1 = cal.getTime();
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
SimpleDateFormat dateFormat1 = new SimpleDateFormat("HH:mm:ss");
String time = dateFormat.format(date1);
String format = dateFormat1.format(date1);
String aa = time + "T" + format + "Z";//拼接solr时间格式
String queryfq = "recordCtime:[2018-09-25T12:01:30Z TO " + aa +"]";//过滤条件
server.deleteByQuery(queryfq);
server.commit();
}
}
solr服务化
nssm版本2.2.4,将nssm解压后,把nssm.exe拷贝至solr的bin目录下,然后cmd运行nssm install solr
在弹出来的对话框输入参数,然后install service
在服务中启动solr服务
solr一些小问题
1.solr时差问题:将数据库中的date字段同步到solr中会有8个小时时差
解决办法:修改bin目录下的solr.cmd文件中
将SOLR_TIMEZONE=UTC改为SOLR_TIMEZONE=UTC+8
2.SOLR清空数据
solr清空数据
1)documents type 选择 XML
2)documents 输入下面语句
<delete><query>*:*</query></delete>
<commit/>
3.solr同步删除暂时没找到解决办法,只能标记删除,在同步的表中创建isdelete字段标记true或者false来标记删除