1.logstash安装
# 拉取镜像
docker pull logstash:7.6.2
# 启动logstash
docker run -d --name=logstash logstash:7.6.2
# 不出意外应该报错了
[root@iZbp18drdmy0c96u5xnc1wZ docker]# docker logs -f logstash
OpenJDK 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release.
OpenJDK 64-Bit Server VM warning: INFO: os::commit_memory(0x00000000ca660000, 899284992, 0) failed; error='Not enough space' (errno=12) #提示内存不够(服务器内存8G,不知道为啥还不够)
# 类似es启动限制内存大小
docker run -d --name=logstash -e LS_JAVA_OPTS="-Xms512m -Xmx1g" logstash:7.6.2 #这一步是自己试出来的,反正是成功启动了
# 创建用于存放配置的目录
mkdir -p /data/docker/logstash
mkdir -p /data/docker/logstash/mysql # 用于存放jdbc jar包
mkdir -p /data/docker/logstash/template # 存放mysql to es 动态模板
docker cp a2b57db59271:/usr/share/logstash/config /data/docker/logstash # 复制配置文件,冒号前面为容器ID
docker cp a2b57db59271:/usr/share/logstash/pipeline /data/docker/logstash # 复制logstash管道文件
docker stop 容器id # 停止原有容器,否则无法删除
docker rm 容器id # 删除原有容器,使用容器ID删除
# 修改配置
docker exec -it 容器id /bin/bash
cd /data/docker/logstash/config
# 修改jvm.options
-Xms512m #根据自身服务器来调整
-Xmx1g #根据自身服务器来调整
-XX:+UseConcMarkSweepGC 改为 -XX:+UseG1GC
# 修改logstash.yml
http.host: "0.0.0.0"
xpack.monitoring.elasticsearch.hosts: [ "http://192.168.0.4:9200" ] #elasticsearch改为192.168.0.4
2.配置logstash增量导入mysql数据到es
#logstash挂载目录结构
[root@iZbp18drdmy0c96u5xnc1wZ logstash]# ls -R
.:
config mysql pipeline template
./config:
jvm.options log4j2.properties logstash-sample.conf logstash-sample.conf.bk logstash.yml pipelines.yml startup.options
./mysql:
art_id.txt doc_id.txt mysql-connector-java-8.0.23.jar #两个txt文件无需创建,运行logstash之后会生成,同时需要给mysql文件777权限
./pipeline:
logstash.conf
./template:
art-ik.json doc-ik.json
# 下载mysql JDBC驱动jar包
https://dev.mysql.com/downloads/connector/j/?os=26 #下载的驱动放入mysql文件夹
# 配置logstash.conf,放入pipline文件夹
input {
stdin {}
jdbc {
type => "articles"
# 数据库连接地址
jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/DbBase?characterEncoding=UTF-8&autoReconnect=true"
# 数据库连接账号密码;
jdbc_user => "username"
jdbc_password => "pwd"
# MySQL依赖包路径;
jdbc_driver_library => "/usr/share/logstash/mysql/mysql-connector-java-8.0.23.jar"
# the name of the driver class for mysql
jdbc_driver_class => "com.mysql.jdbc.Driver"
# 数据库重连尝试次数
connection_retry_attempts => "3"
# 判断数据库连接是否可用,默认false不开启
jdbc_validate_connection => "true"
# 数据库连接可用校验超时时间,默认3600S
jdbc_validation_timeout => "3600"
# 开启分页查询(默认false不开启);
jdbc_paging_enabled => "true"
# 单次分页查询条数(默认100000,若字段较多且更新频率较高,建议调低此值);
jdbc_page_size => "30"
# statement为查询数据sql,如果sql较复杂,建议配通过statement_filepath配置sql文件的存放路径;
# sql_last_value为内置的变量,存放上次查询结果中最后一条数据tracking_column的值,此处即为update_time;
# statement_filepath => "mysql/jdbc.sql"
statement => "SELECT id,title,create_time,content,update_time,status FROM `scmy_articles` WHERE status=0 and update_time>= :sql_last_value order by create_time desc"
# 是否将字段名转换为小写,默认true(如果有数据序列化、反序列化需求,建议改为false);
lowercase_column_names => false
# Value can be any of: fatal,error,warn,info,debug,默认info;
sql_log_level => warn
#
# 是否记录上次执行结果,true表示会将上次执行结果的tracking_column字段的值保存到last_run_metadata_path指定的文件中;
record_last_run => true
# 需要记录查询结果某字段的值时,此字段为true,否则默认tracking_column为timestamp的值;
use_column_value => true
# 需要记录的字段,用于增量同步,需是数据库字段
tracking_column => "update_time"
# Value can be any of: numeric,timestamp,Default value is "numeric"
tracking_column_type => timestamp
# record_last_run上次数据存放位置;
last_run_metadata_path => "/usr/share/logstash/mysql/art_id.txt"
# 是否清除last_run_metadata_path的记录,需要增量同步时此字段必须为false;
clean_run => false
#
# 设置监听间隔 各字段含义(从左至右)分、时、天、月、年,全为*默认含义为每分钟都更新
schedule => "*/5 * * * *" #每5分钟更新一次
}
jdbc {
type => "doc"
# 数据库连接地址
jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/DbBase?characterEncoding=UTF-8&autoReconnect=true"
# 数据库连接账号密码;
jdbc_user => "username"
jdbc_password => "pwd"
# MySQL依赖包路径;
jdbc_driver_library => "/usr/share/logstash/mysql/mysql-connector-java-8.0.23.jar"
# the name of the driver class for mysql
jdbc_driver_class => "com.mysql.jdbc.Driver"
# 数据库重连尝试次数
connection_retry_attempts => "3"
# 判断数据库连接是否可用,默认false不开启
jdbc_validate_connection => "true"
# 数据库连接可用校验超时时间,默认3600S
jdbc_validation_timeout => "3600"
# 开启分页查询(默认false不开启);
jdbc_paging_enabled => "true"
# 单次分页查询条数(默认100000,若字段较多且更新频率较高,建议调低此值);
jdbc_page_size => "30"
# statement为查询数据sql,如果sql较复杂,建议配通过statement_filepath配置sql文件的存放路径;
# sql_last_value为内置的变量,存放上次查询结果中最后一条数据tracking_column的值,此处即为update_time;
# statement_filepath => "mysql/jdbc.sql"
statement => "SELECT id,name,avatar,department,hospital,expertise,intro,update_time,status FROM scmy_doctors where `status`=0 and update_time>= :sql_last_value order by id desc"
# 是否将字段名转换为小写,默认true(如果有数据序列化、反序列化需求,建议改为false);
lowercase_column_names => false
# Value can be any of: fatal,error,warn,info,debug,默认info;
sql_log_level => warn
#
# 是否记录上次执行结果,true表示会将上次执行结果的tracking_column字段的值保存到last_run_metadata_path指定的文件中;
record_last_run => true
# 需要记录查询结果某字段的值时,此字段为true,否则默认tracking_column为timestamp的值;
use_column_value => true
# 需要记录的字段,用于增量同步,需是数据库字段
tracking_column => "update_time"
# Value can be any of: numeric,timestamp,Default value is "numeric"
tracking_column_type => timestamp
# record_last_run上次数据存放位置;
last_run_metadata_path => "/usr/share/logstash/mysql/doc_id.txt"
# 是否清除last_run_metadata_path的记录,需要增量同步时此字段必须为false;
clean_run => false
#
# 设置监听间隔 各字段含义(从左至右)分、时、天、月、年,全为*默认含义为每分钟都更新
schedule => "*/5 * * * *" #每5分钟更新一次
}
}
filter {
json {
source => "message"
remove_field => ["message"]
}
mutate{
gsub => [ "content", "<script(.*?)</script>", "" ]
}
mutate{
gsub => [ "content", "<iframe(.*?)</iframe>", "" ]
}
mutate{
gsub => [ "content", "<style(.*?)</style>", "" ]
}
mutate{
gsub => [ "content", "<(.*?)>", "" ]
}
mutate{
gsub => [ "content", " ", "" ]
}
}
output {
stdout {
codec => json_lines
}
if[type]=="articles"{
elasticsearch {
# host => "127.0.0.1"
# port => "9200"
# 配置ES集群地址
hosts => "192.168.0.6:9200"
# 索引名字,必须小写
index => "art_index"
# type名(7.X已弃用)
# document_type => "articles_type"
# 数据唯一索引(建议使用数据库id)
document_id => "%{id}"
manage_template =>false
template_overwrite => true
template => "/usr/share/logstash/template/art-ik.json"
}
}
if[type]=="doc"{
elasticsearch {
# host => "127.0.0.1"
# port => "9200"
# 配置ES集群地址
hosts => "192.168.0.6:9200"
# 索引名字,必须小写
index => "doc_index"
# type名(7.X已弃用)
# document_type => "articles_type"
# 数据唯一索引(建议使用数据库id)
document_id => "%{id}"
manage_template =>false
template_overwrite => true
template => "/usr/share/logstash/template/doc-ik.json"
}
}
}
# 索引模板配置 art-ik.json,doc-ik.json 放入template文件夹
#art-ik.json
{
"order":3,
"version": 70001,
"template": "art_*", #这里需要注意跟logstash.conf outPut的es配置中index匹配,如art_index,以art_开头的索引都会匹配上
"settings": {
"index": {
"refresh_interval": "5s"
},
"analysis": { #自定义了分词器,目的是为了去除文章内容html标签
"analyzer": {
"ik_html": {
"type": "custom",
"char_filter": [
"html_strip"
],
"tokenizer": "ik_max_word",
"filter": [
"lowercase",
"stop",
"snowball"
]
}
}
}
},
"mappings": {
"dynamic_templates": [
{
"message_field": {
"path_match": "message",
"mapping": {
"norms": false,
"type": "text"
},
"match_mapping_type": "string"
}
},
{
"string_fields": {
"mapping": {
"norms": false,
"type": "text",
"analyzer": "ik_html",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"match_mapping_type": "string",
"match": "*"
}
}
],
"properties": {
"@timestamp": {
"type": "date"
},
"geoip": {
"dynamic": true,
"properties": {
"ip": {
"type": "ip"
},
"latitude": {
"type": "half_float"
},
"location": {
"type": "geo_point"
},
"longitude": {
"type": "half_float"
},
"id": {
"type": "long"
},
"title": {
"norms": false,
"analyzer": "ik_max_word",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"create_time": {
"type": "date"
},
"content": {
"norms": false,
"analyzer": "ik_html",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"update_time": {
"type": "date"
},
"status": {
"type": "integer"
}
}
},
"@version": {
"type": "keyword"
}
}
}
}
#doc_index.json
{
"order":2,
"version": 70001,
"template": "doc_*", #同理
"settings": {
"index": {
"refresh_interval": "5s"
}
},
"mappings": {
"dynamic_templates": [
{
"message_field": {
"path_match": "message",
"mapping": {
"norms": false,
"type": "text"
},
"match_mapping_type": "string"
}
},
{
"string_fields": {
"mapping": {
"norms": false,
"type": "text",
"analyzer": "ik_max_word",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"match_mapping_type": "string",
"match": "*"
}
}
],
"properties": {
"@timestamp": {
"type": "date"
},
"geoip": {
"dynamic": true,
"properties": {
"ip": {
"type": "ip"
},
"latitude": {
"type": "half_float"
},
"location": {
"type": "geo_point"
},
"longitude": {
"type": "half_float"
},
"id": {
"type": "long"
},
"name": {
"norms": false,
"analyzer": "ik_max_word",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"avatar": {
"type": "text",
"index": false
},
"department": {
"norms": false,
"analyzer": "ik_max_word",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"hospital": {
"norms": false,
"analyzer": "ik_max_word",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"expertise": {
"norms": false,
"analyzer": "ik_max_word",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"intro": {
"norms": false,
"analyzer": "ik_max_word",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"update_time": {
"type": "date"
},
"status": {
"type": "integer"
}
}
},
"@version": {
"type": "keyword"
}
}
}
}
3.启动logstash容器
# 创建新容器并挂载目录
docker run -di --name=logstash \
-e LS_JAVA_OPTS="-Xms512m -Xmx512m" \
-v /data/docker/logstash/config:/usr/share/logstash/config \
-v /data/docker/logstash/mysql:/usr/share/logstash/mysql \
-v /data/docker/logstash/template:/usr/share/logstash/template \
-v /data/docker/logstash/pipeline:/usr/share/logstash/pipeline logstash:7.6.2
#注意:创建容器这一步如果出现我下面的问题,两个索引模板需要先创建,再创建容器。
这里我出现了问题,数据导入了es,但是ik分词没有生效,之后通过在kibana Dev Tools生成了模板之后才生效
#自定义的模板没有生效,知道问题的大神希望能给解答一下,谢谢....
#生成art_index模板
PUT _template/art
{
"order":2,
"version": 70001,
"template": "art_*",
"settings": {
"index": {
"refresh_interval": "5s"
},
"analysis": {
"analyzer": {
"ik_html": {
"type": "custom",
"char_filter": [
"html_strip"
],
"tokenizer": "ik_max_word",
"filter": [
"lowercase",
"stop",
"snowball"
]
}
}
}
},
"mappings": {
"dynamic_templates": [
{
"message_field": {
"path_match": "message",
"mapping": {
"norms": false,
"type": "text"
},
"match_mapping_type": "string"
}
},
{
"string_fields": {
"mapping": {
"norms": false,
"type": "text",
"analyzer": "ik_html",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"match_mapping_type": "string",
"match": "*"
}
}
],
"properties": {
"@timestamp": {
"type": "date"
},
"geoip": {
"dynamic": true,
"properties": {
"ip": {
"type": "ip"
},
"latitude": {
"type": "half_float"
},
"location": {
"type": "geo_point"
},
"longitude": {
"type": "half_float"
},
"id": {
"type": "long"
},
"title": {
"norms": false,
"analyzer": "ik_max_word",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"create_time": {
"type": "date"
},
"content": {
"norms": false,
"analyzer": "ik_html",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"update_time": {
"type": "date"
},
"status": {
"type": "integer"
}
}
},
"@version": {
"type": "keyword"
}
}
}
}
#生成doc_index模板
PUT _template/doc
{
"order":2,
"version": 70001,
"template": "doc_*", #同理
"settings": {
"index": {
"refresh_interval": "5s"
}
},
"mappings": {
"dynamic_templates": [
{
"message_field": {
"path_match": "message",
"mapping": {
"norms": false,
"type": "text"
},
"match_mapping_type": "string"
}
},
{
"string_fields": {
"mapping": {
"norms": false,
"type": "text",
"analyzer": "ik_max_word",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"match_mapping_type": "string",
"match": "*"
}
}
],
"properties": {
"@timestamp": {
"type": "date"
},
"geoip": {
"dynamic": true,
"properties": {
"ip": {
"type": "ip"
},
"latitude": {
"type": "half_float"
},
"location": {
"type": "geo_point"
},
"longitude": {
"type": "half_float"
},
"id": {
"type": "long"
},
"name": {
"norms": false,
"analyzer": "ik_max_word",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"avatar": {
"type": "text",
"index": false
},
"department": {
"norms": false,
"analyzer": "ik_max_word",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"hospital": {
"norms": false,
"analyzer": "ik_max_word",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"expertise": {
"norms": false,
"analyzer": "ik_max_word",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"intro": {
"norms": false,
"analyzer": "ik_max_word",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"update_time": {
"type": "date"
},
"status": {
"type": "integer"
}
}
},
"@version": {
"type": "keyword"
}
}
}
}