背景
在logstash向es传输数据时,需要根据数据文件配置指定pipeline采集数据
实例
logstash的pipeline文件配置
input {
# input中可以存在多个file输入
file {
# 指定日志数据的来源
path => "/var/log/iotplatform/audit/*/*.log"
# sicendb_path 记录日志数据的读取,在logstash重启后,根据文件内记录继续读取数据
sincedb_path => "{{logstash_conf_dir}}/logstash-iotplatform"
# 在logstash启动后是否从文件开头读取数据,配置为beginning后忽略sicendb_path
#start_position => beginning
# 为读取的文件添加type类型
type => "auditlog"
# 通配日期开头的文件内容,当不符合时,默认向上合并行
codec => multiline {
# Grok pattern names are valid! :)
pattern => "(?:^|\n)\[*%{TIMESTAMP_ISO8601}\]*"
negate => true
what => previous
auto_flush_interval => 1
}
}
file {
path => "/var/log/iotplatform/monitor/*/*.log"
sincedb_path => "{{logstash_conf_dir}}/logstash-iotplatform"
#start_position => beginning
type => "monitorlog"
codec => multiline {
# Grok pattern names are valid! :)
pattern => "(?:^|\n)\[*%{TIMESTAMP_ISO8601}\]*"
negate => true
what => previous
auto_flush_interval => 1
}
}
}
filter {
# 通过mutate将字符串分隔,添加字段
mutate {
split => {"message" => "|"}
add_field => { "date" => "%{[message][0]}" }
add_field => { "platform" => "%{[message][1]}" }
add_field => { "product" => "%{[message][2]}" }
add_field => { "module" => "%{[message][3]}" }
add_field => { "hostAndPort" => "%{[message][4]}" }
add_field => { "ip" => "%{[message][5]}" }
add_field => { "region" => "%{[message][6]}" }
add_field => { "logType" => "%{[message][7]}" }
add_field => { "logLevel" => "%{[message][8]}" }
add_field => { "username" => "%{[message][9]}" }
add_field => { "token" => "%{[message][10]}" }
add_field => { "uri" => "%{[message][11]}" }
add_field => { "httpMethod" => "%{[message][12]}" }
add_field => { "resultCode" => "%{[message][13]}" }
add_field => { "action" => "%{[message][14]}" }
add_field => { "extension" => "%{[message][15]}" }
add_field => { "content" => "%{[message][16]}" }
}
# 删除message字段
mutate{
remove_field => ["message"]
# 将resultCode转化为integer类型
convert => { "resultCode" => "integer" }
}
# 转化date的时区
grok{
match => [
"date", "%{TIMESTAMP_ISO8601:logdate}"
]
}
# 定义logdate的时间格式
date {
match => ["logdate", "yyyy-MM-dd HH:mm:ss,SSS"]
}
}
output {
# 通过file中定义的type类型,输出到指定的索引中
if [type] == "auditlog" {
elasticsearch {
# es的地址,例:[localhost:9200,localhost2:9200]
hosts => [{{elastic_discovery_seed_hosts}}]
# 输入的索引名称,-%{+YYYY-MM-dd}按天自动建立索引
index => "iotplatform-audit-%{+YYYY-MM-dd}"
# 是否使用模板创建索引,在模板中可提前定义索引的字段类型
manage_template => true
# 索引模板文件
template => "{{logstash_conf_dir}}/iotplatform_template.json"
# 模板在es中定义的名称,可自定义,不必与文件名称相同
template_name => "iotplatform_template"
# 在logstash重启后,是否使用模板文件覆盖es中已存在的索引模板
template_overwrite =>"true"
# 失败重试次数
retry_on_conflict => 5
}
}
if [type] == "monitorlog" {
elasticsearch {
hosts => [{{elastic_discovery_seed_hosts}}]
index => "iotplatform-monitor-%{+YYYY-MM-dd}"
manage_template => true
template => "{{logstash_conf_dir}}/iotplatform_template.json"
template_name => "iotplatform_template"
template_overwrite =>"true"
retry_on_conflict => 5
}
}
}
es的模板示例:
{
# 模板通配的索引类型,在创建以iotplatform_template为开头的索引时使用该模板
"index_patterns" : "iotplatform_template*",
# 自定义配置
"settings" : {
"index" : {
# 搜索返回结果数最大10000条数据设置
"max_result_window" : "10000",
# 索引刷新时间,默认1s
"refresh_interval" : "1s",
# 指索引要做多少个分片,只能在创建索引时指定,后期无法修改
"number_of_shards" : "1",
# 每个分片有多少个副本,后期可以动态修改
"number_of_replicas" : "0",
"blocks" : {
# 索引是否开启只读
"read_only_allow_delete" : "false"
}
}
},
# 索引字段映射
# 在索引创建完成后,无法直接更新字段,只能通过重建索引来更新
"mappings" : {
"properties" : {
"date" : {
"type" : "text"
},
"logType" : {
"type" : "keyword"
},
"product" : {
"type" : "keyword"
},
"extension" : {
"type" : "text"
},
"module" : {
"type" : "keyword"
},
"ip" : {
"type" : "keyword"
},
"hostAndPort" : {
"type" : "keyword"
},
"resultCode" : {
"type" : "integer"
},
"httpMethod" : {
"type" : "keyword"
},
"uri" : {
"type" : "keyword"
},
"platform" : {
"type" : "keyword"
},
"content" : {
"type" : "text"
},
"token" : {
"type" : "keyword"
},
"times" : {
"type" : "keyword"
},
"logLevel" : {
"type" : "keyword"
},
"action" : {
"type" : "keyword"
},
"region" : {
"type" : "keyword"
},
"username" : {
"type" : "keyword"
}
}
}
}
扩展pipeline.yml
存在多个pipeline文件时,可以将它们统一配置在pipeline.yml文件中,在logstash启动时达成启动多个pipeline实例
# - pipeline.id: test
# pipeline.workers: 1
# pipeline.batch.size: 125
# pipeline.batch.delay: 50
# path.config: "/tmp/logstash/*.config"
id 管道名词,唯一
workers 处理线程数(Filter、Output使用)
batch.size 每个线程批处理大小
batch.delay 每个线程批处理事件最大等待时间(默认50ms)
Config 管道引用的配置文件
注:可使用config.string直接配置,例:config.string: "input { generator {} } filter { sleep { time => 1 } } output { stdout { codec => dots } }"
实例:
- pipeline.id: mylostash1
path.config: "{{logstash_conf_dir}}/{{logstash_conf_name}}"
- pipeline.id: mylostash2
path.config: "{{logstash_conf_dir}}/{{logstash_dt_conf_name}}"
注:管道之间不能存在空格,即空行要顶格开头