说明:以下文档说到elasticsearch都简称es。
logstash同步数据不仅仅是数据库到es,它只是一个同步中间件,数据来源和数据的目标存储都是可以在配置里面指定的,根据数据来源和目标存储的不同配合logstash提供的不同插件。
本文讲解的是从mysql 同步到es实现方式。使用的版本logstash版本是6.6.1,es版本是6.5.4,logstash版本和es版本一定要配套,如果2者版本差距过大,同步过程中会报版本错误。
写这篇文章的时候es和Logstash最新版本是7.2。
1、logstash是一款开源的数据同步工具,还是比较优秀的。其他几款同步es数据的工具,比如elasticseach-jdbc等都试过,好久没人维护了。
官方文档访问地址:https://www.elastic.co/guide/en/logstash/current/index.html
2、es是一个基于Lucene的搜索服务器。它提供了一个分布式多用户能力的全文搜索引擎,基于RESTful web接口。
官方说明文档访问地址:https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html
3、logstash和es的安装
参考官方文档。
4、mysql同步到es的logstash配置文件,命名一个logstash-mysql-es.conf配置文件,其中ip是mysql数据库对应的ip,es_ip是elasticsearch对应的ip内容如下:
jdbc {
type1 => "kl_carousel_info"
jdbc_connection_string2 => "jdbc:mysql://ip:3306/db_name?useUnicode=true&characterEncoding=utf-8&useSSL=false"
jdbc_user3 => "root"
jdbc_password4 => "123456"
jdbc_driver_library5 => "./config/mysql-connector-java-5.1.38.jar"
jdbc_driver_class6 => "com.mysql.jdbc.Driver"
jdbc_paging_enabled7 => true
jdbc_fetch_size8 => 100
jdbc_page_size9 => 100000
jdbc_default_timezone10 =>"Asia/Shanghai"
statement11 => "select * from kl_carousel_info where createtime >= :sql_last_value order by createtime asc"
schedule12 => "*/3 * * * *"
record_last_run13 => true
use_column_value14 => true
tracking_column15 => "createtime"
tracking_column_type16 => "numeric"
last_run_metadata_path17 => "./id/logstash_kl_carousel_info_last_id"
clean_run18 => false
lowercase_column_names19 => false
}
jdbc {
type => "kl_knowledge_article"
jdbc_connection_string => "jdbc:mysql://ip:3306/db_name?useUnicode=true&characterEncoding=utf-8&useSSL=false"
jdbc_user => "root"
jdbc_password => "123456"
jdbc_driver_library => "./config/mysql-connector-java-5.1.38.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_paging_enabled => true
jdbc_fetch_size => 100
jdbc_page_size => 100000
jdbc_default_timezone =>"Asia/Shanghai"
statement => "SELECT 'kl_infomation_article' table_name,a.`title` m_title,a.pinyin,a.`type` m_type,a.`knowledge_type_id`,a.`knowledge_type_name`,a.`tags`,a.status,a.`click_rate`, a.id m_id,c.`organization_id`,c.`organization_name`,b.`id`,b.`masterid`,b.`content`,b.`accessory_id`,b.`accessory_path`,b.`state`,b.`sort`,b.`updatetime`,b.`createtime` FROM `kl_infomation_article` b INNER JOIN kl_infomation_master a on a.id=b.masterid INNER JOIN `kl_master_to_organization` c ON a.`id`=c.`master_id` WHERE b.`state`='1' AND a.`state`='1' AND c.`state`='1' AND b.updatetime >= :sql_last_value order by b.updatetime asc"
schedule => "*/3 * * * *"
record_last_run => true
use_column_value => true
tracking_column => "updatetime"
tracking_column_type => "numeric"
last_run_metadata_path => "./id/logstash_kl_knowledge_article_last_id"
clean_run => false
lowercase_column_names => false
}
jdbc {
type => "kl_knowledge_document"
jdbc_connection_string => "jdbc:mysql://ip:3306/db_name?useUnicode=true&characterEncoding=utf-8&useSSL=false"
jdbc_user => "root"
jdbc_password => "123456"
jdbc_driver_library => "./config/mysql-connector-java-5.1.38.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_paging_enabled => true
jdbc_fetch_size => 100
jdbc_page_size => 100000
jdbc_default_timezone =>"Asia/Shanghai"
statement => "SELECT 'kl_infomation_document' table_name,a.`title` m_title,a.pinyin,a.`type` m_type,a.`knowledge_type_id`,a.`knowledge_type_name`,a.`tags`,a.status,a.`click_rate`,a.id m_id,c.`organization_id`,c.`organization_name`, b.* from `kl_infomation_document` b INNER JOIN kl_infomation_master a on a.id=b.master_id INNER JOIN `kl_master_to_organization` c ON a.`id`=c.`master_id` WHERE b.`state`='1' AND a.`state`='1' AND c.`state`='1' AND b.updatetime >= :sql_last_value order by b.updatetime asc"
schedule => "*/3 * * * *"
record_last_run => true
use_column_value => true
tracking_column => "updatetime"
tracking_column_type => "numeric"
last_run_metadata_path => "./id/logstash_kl_knowledge_document_last_id"
clean_run => false
lowercase_column_names => false
}
}
if[type]=="kl_carousel_info"20{
elasticsearch {
hosts21 => "es_ip:9200"
index22 => "kl_carousel_info"
document_type23 => "doc"
document_id24 => "%{id}"
manage_template25 => true
template_overwrite26 => true
template_name27 => "kl_carousel_info"
template28 => "/opt/elasticsearch/logstash-6.6.1/template/kl_carousel_info_logstash.json"
}
}
if[type]=="kl_knowledge_article"{
elasticsearch {
hosts => "es_ip:9200"
index => "kl_knowledge"
document_type => "doc"
document_id => "%{id}"
manage_template => true
template_overwrite => true
template_name => "kl_knowledge"
template => "/opt/elasticsearch/logstash-6.6.1/template/kl_knowledge_logstash.json"
}
}
if[type]=="kl_knowledge_document"{
elasticsearch {
hosts => "es_ip:9200"
index => "kl_knowledge"
document_type => "doc"
document_id => "%{id}"
manage_template => true
template_overwrite => true
template_name => "kl_knowledge"
template => "/opt/elasticsearch/logstash-6.6.1/template/kl_knowledge_logstash.json"
}
}
stdout {
codec => json_lines
}
}
"template": "kl_*"1,
"order" : 12,
"settings": {
"index.number_of_shards": 5,
"number_of_replicas": 1,
"index.refresh_interval": "60s",
"analysis":{
"analyzer":{
"pinyin_smart"3:{
"type":"custom",
"tokenizer":"ik_smart"4,
"char_filter": ["html_strip"]5,
"filter":[
"my_pinyin"6
]
}
},
"filter":{
"my_pinyin"7:{
"type":"pinyin"8,
"keep_separate_first_letter" : false,
"keep_full_pinyin" : true,
"keep_original" : true,
"limit_first_letter_length" : 16,
"lowercase" : false,
"remove_duplicated_term" : true
}
}
}
},
"mappings":
{
"doc": {
"dynamic_templates"9: [ {
"string_fields" : {
"match" : "*"10,
"match_mapping_type" : "string"11,
"mapping" : {
"analyzer":"pinyin_smart"12,
"type" : "text"13,
"fields": {
"raw": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
],
"properties"14: {
"@timestamp":{
"type": "date"
},
"id": {
"type": "keyword"
},
"m_id": {
"type": "keyword"
},
"masterid": {
"type": "keyword"
},
"master_id": {
"type": "keyword"
},
"accessory_id": {
"type": "keyword"
},
"parent_id": {
"type": "keyword"
},
"knowledge_type_id": {
"type": "keyword"
},
"organization_id": {
"type": "keyword"
},
"table_name": {
"type": "keyword"
},
"m_type": {
"type": "keyword"
},
"m_title"15: {
"type": "text",
"norms": true16,
"analyzer":"ik_smart"17,
"search_analyzer":"ik_smart"18,
"fields": {
"pinyin"19: {
"type": "text",
"analyzer": "pinyin_smart"
},
"raw"20: {
"ignore_above":25621,
"type": "keyword"22
}
}
},
"title": {
"type": "text",
"norms": true,
"analyzer":"ik_smart",
"search_analyzer":"ik_smart",
"fields": {
"pinyin": {
"type": "text",
"analyzer": "pinyin_smart"
},
"raw": {
"ignore_above":256,
"type": "keyword"
}
}
},
"img_title": {
"type": "text",
"norms": true,
"analyzer":"ik_smart",
"search_analyzer":"ik_smart",
"fields": {
"pinyin": {
"type": "text",
"analyzer": "pinyin_smart"
},
"raw": {
"ignore_above":256,
"type": "keyword"
}
}
},
"content": {
"type": "text",
"norms": true,
"analyzer":"ik_smart",
"search_analyzer":"ik_smart",
"fields": {
"pinyin": {
"type": "text",
"analyzer": "pinyin_smart"
},
"raw": {
"ignore_above":256,
"type": "keyword"
}
}
}
}
}
}
}