logstash同步es

最新推荐文章于 2024-09-16 00:29:15 发布

KingFu28

最新推荐文章于 2024-09-16 00:29:15 发布

阅读量429

点赞数

文章标签： elasticsearch 数据库 postgresql

本文链接：https://blog.csdn.net/kingwin28/article/details/128955851

版权

文章描述了一种从PostgreSQL数据库同步数据到Elasticsearch7的方法，特别是处理一对多关系时采用nested索引。通过Logstash进行数据聚合和转换，然后将结果存储到Elasticsearch的nested类型的索引中。设置包括删除并重建索引以实现全量更新，以及配置Logstash的jdbc输入插件和Elasticsearch输出。

摘要由CSDN通过智能技术生成

此例为从 pg 数据库同步到 es7，采用了nested方式实现一对多数据同步
注意：需要先建 nested 索引，logstash 自己建的类型不是 nested 的。
如需要全量更新，可通过sh/bat 先删除索引后再建索引即可、
删除索引命令 postman：delete http://192.168.30.32:9200/nested_test_index/?master_timeout=55m
------------------ sql ------------------
select zp.*,zpu.user_id from zs_project zp left join zs_project_user zpu on zp.id = zpu.project_id where zp.is_delete = false
order by zp.id desc
------------------ ES索引 postman json PUT ------------------
http://192.168.30.32:9200/nested_test_index?include_type_name=true
{
“settings”: {
“analysis”: {
“analyzer”: {
“underlined”: {
“type”: “pattern”,
“pattern”: “_|\s|-”
},
“case_insensitive_analyzer”: {
“type”: “custom”,
“filter”: [
“lowercase”
],
“tokenizer”: “keyword”
},
“char_analyzer”: {
“type”: “custom”,
“tokenizer”: “char_tokenizer”
}
},
“tokenizer”: {
“char_tokenizer”: {
“type”: “pattern”,
“pattern”: “|”
}
}
},
“index”: {
“refresh_interval”: “30s”,
“number_of_shards”: “9”,
“max_result_window”: 10000000
},
“number_of_replicas”: 2
},
“mappings”: {
// nested 必须包这一层
“project”:{
“properties”: {
“id”: {
“type”: “keyword”
},
“project_name”: {
“analyzer”: “ik_max_word”,
“search_analyzer”: “ik_max_word”,
“type”: “text”
},
“projectUsers”: {
“type”: “nested”,
“properties”: {
“user_id”: {
“type”: “keyword”
}
}
}
}
}

}

------------------ logstash.conf ------------------
input {
stdin {}
jdbc {
jdbc_driver_library => “D:\code\logstash-8.1.1\lib\postgresql-42.2.20.jar”
jdbc_driver_class => “org.postgresql.Driver”
jdbc_connection_string => “jdbc:postgresql://192.168.30.32:5432/zsdev1_3?useUnicode=true&characterEncoding=utf-8&allowMultiQueries=true”
jdbc_user => “postgres”
jdbc_password => “root”
#jdbc_paging_enabled => “true”
#jdbc_page_size => “50”
#是否清除 last_run_metadata_path 的记录,如果为真那么每次都相当于从头开始查询所有的数据库记录
clean_run => true
#此时该参数就要为 true. 否则默认 track 的是 timestamp 的值.
use_column_value => true
#是否记录上次执行结果, 如果为真,将会把上次执行到的 tracking_column 字段的值记录下来,保存到 last_run_metadata_path 指定的文件中
record_last_run => “true”
#如果 use_column_value 为真,需配置此参数. 这个参数就是数据库给出的一个字段名称。当然该字段必须是递增的，可以是数据库的数据时间这类的
tracking_column => “create_time”
#schedule => “*/1 * * * "
#们只需要在 SQL 语句中 WHERE MY_ID > :last_sql_value 即可. 其中 :sql_last_value 取得就是该文件中的值
#last_run_metadata_path => “/Users/menglinjie/ES-node/testdata.text”
#因为默认是true，并且Kibana是大小写区分的。准确的说应该是ES大小写区分
lowercase_column_names => false
statement_filepath => “D:\code\logstash-8.1.1\sql\pgsql\nested-test.sql”
#schedule => " * * * *”
type => “jdbc”
jdbc_default_timezone =>“Asia/Shanghai”
}

}

filter {
#这里做聚合
aggregate {
task_id => “%{id}”
code => "
map[‘id’] = event.get(‘id’)
map[‘project_name’] = event.get(‘project_name’)
map[‘projectUser_list’] ||=[]
map[‘projectUsers’] ||=[]
#判断是否为空
if (event.get(‘user_id’) != nil)
#用于去重，也可以在sql语句中去重
if !(map[‘projectUser_list’].include? event.get(‘user_id’))
map[‘projectUser_list’] << event.get(‘user_id’)
map[‘projectUsers’] << {
‘user_id’ => event.get(‘user_id’)
}
end
end
event.cancel()
"
push_previous_map_as_event => true
timeout => 7
}

#json {
#source => “message”
#remove_field => [“message”]
#remove_field => [“message”, “type”, “@timestamp”, “@version”]
#}

mutate {
#将不需要的JSON字段过滤，且不会被存入 ES 中
remove_field => [“tags”, “@timestamp”, “@version”]
}
}

output {
stdout {
#codec => json_lines
}
elasticsearch {
hosts => [“192.168.30.32:9200”]
user => “elastic”
password => “zsdm2022”
index => “nested_test_index”
# template => “D:\code\logstash-8.1.1\es-template\fuzzy.json”
# template_name => “t-statistic-out-logstash”
# template_overwrite => true
# document_type => “out”
# document_id => “%{id}”
}
}

------------------ 启动logstash ------------------
logstash -w 1 -f D:\code\logstash-8.1.1\config\logstash.conf

------------------ 部分搜索结果 ------------------
{
“_index”: “nested_test_index”,
“_type”: “project”,
“_id”: “V2OVAYABj0z2Mqzx95Zd”,
“_score”: 1.0,
“_source”: {
“project_name”: “grace”,
“projectUsers”: [
{
“user_id”: 932935637266714624
},
{
“user_id”: 588689164551917652
}
],
“id”: 955754738379034624,
“projectUser_list”: [
932935637266714624,
588689164551917652
]
}
}