Logstatsh作用
- 数据采集
- 以id或update_time作为同步边界 一般用update_time id不支持修改
- Logstatsh-input-jdbc插件 新版本自带 版本号要和elasticsearch保持同步
- 预先创建索引
安装
- jdk必须要配置
- 需要mysql驱动包 mysql-connector-java-5.1.30.jar maven安装包路径下mysql下的mysql-connector-java文件夹里有
- 文件上传到服务器 解压
- mv logstash-7.12.0 /usr/local/
- cd /usr/local/logstash-7.12.0
- mkdir sync cd sync/
- vim logstash-db-sync.conf
- 把mysql的驱动上传到这个目录
- vim work.sql
- 如果出现Expected one of [ \t\r\n], “#”, 这种类型的错误 请检查文件格式 书否缺少括号 或者格式是否正确
- 到bin目录下启动 ./logstash -f /usr/local/logstash-7.12.0/sync/logstash-db-sync.conf
- 检测配置文件是否合法./logstash -f /usr/local/logstash-7.12.0/sync/logstash-db-sync.conf -t
- work.sql 文件
SELECT
i.id as id,
i.item_name as itemName,
i.sell_counts as sellCounts,
i.updated_time as updated_time,
ii.url as imgUrl,
temp.price_discount as price
from items i
LEFT JOIN items_img ii on ii.item_id = i.id
LEFT JOIN
(SELECT item_id,MIN(price_discount) as price_discount FROM items_spec
GROUP BY item_id) temp on temp.item_id = i.id
WHERE ii.is_main = 1
and i.updated_time >= :sql_last_value
- logstash-db-sync.conf
input {
jdbc {
jdbc_connection_string => "jdbc:mysql://192.168.0.198:3306/food?characterEncoding=UTF-8&useSSL=false&useUnicode=true&serverTimezone=UTC"
jdbc_user => "root"
jdbc_password => "root"
jdbc_driver_library => "/usr/local/logstash-7.12.0/sync/mysql-connector-java-5.1.30.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
# 开启分页
jdbc_paging_enabled => true
# 分页数量
jdbc_page_size => "10000"
# sql 执行脚本文件路径
statement_filepath => "/usr/local/logstash-7.12.0/sync/work.sql"
# 代表 分 时 天 月 年 全部 * 默认1分钟跑一次
schedule => "* * * * *"
# 索引类型
type => "_doc"
#是否记录上次追逐时间 这个会记录到 last_run_metadata_path 文件
use_column_value => true
last_run_metadata_path => "/usr/local/logstash-7.12.0/sync/track_time"
# 追踪的列
tracking_column => "updated_time"
tracking_column_type => "timestamp"
# true 每次都从头开始查
clean_run => false
# 大小写转换
lowercase_column_names => false
}
}
output {
elasticsearch {
# es地址
hosts => ["192.168.0.199:9200"]
#预先建好的索引
index => "items"
#设置document id
document_id => "%{id}"
}
# 日志输出
stdout {
codec => json_lines
}
}
-
上面的方式默认分词方式不是中文 设置IK分词器 需要如下配置
-
在sync目录下 vim logstash-ik.json
{
"order":0,
"version":1,
"index_patterns":[
"*"
],
"settings":{
"index":{
"number_of_shards":"1",
"refresh_interval":"5s"
}
},
"mappings":{
"dynamic_templates":[
{
"message_field":{
"path_match":"message",
"mapping":{
"norms":false,
"type":"text"
},
"match_mapping_type":"string"
}
},
{
"string_fields":{
"mapping":{
"norms":false,
"type":"text",
"analyzer":"ik_max_word",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"match_mapping_type":"string",
"match":"*"
}
}
],
"properties":{
"@timestamp":{
"type":"date"
},
"geoip":{
"dynamic":true,
"properties":{
"ip":{
"type":"ip"
},
"latitude":{
"type":"half_float"
},
"location":{
"type":"geo_point"
},
"longitude":{
"type":"half_float"
}
}
},
"@version":{
"type":"keyword"
}
}
},
"aliases":{
}
}
- 在logstash-db-sync.conf 的 output标签下追加即可
output {
elasticsearch {
# es地址
hosts => ["192.168.0.199:9200"]
#预先建好的索引
index => "items"
#设置document id
document_id => "%{id}"
# 定义模板名称
template_name => "myik"
# 模板所在位置
template => "/usr/local/logstash-7.12.0/sync/logstash-ik.json"
# 重写模板
template_overwrite => true
# 默认true 自定义模板设置成false
manage_template => false
}
# 日志输出
stdout {
codec => json_lines
}
}