下载logstash(最好下载与es相同的版本,这里为了测试下载的低版本)
wget https://download.elastic.co/logstash/logstash/logstash-2.3.4.tar.gz
解压
tar -zxvf logstash-2.3.4.tar.gz
运行测试
# 使用标准输入输出
./logstash-2.3.4/bin/logstash -e 'input { stdin { } } output { stdout {} }'
# 使用标准输入输出,输出格式化为json
./logstash-2.3.4/bin/logstash -e 'input { stdin { } } output { stdout {codec => json} }'
# 加载配置文件启动
./logstash-2.3.4/bin/logstash -f logstash-simple.conf
# 加载多个配置文件启动
./logstash-2.3.4/bin/logstash -f .conf/*
logstash模式
logstash做的事情分三个阶段依次执行:输入——》处理filter(不是必须)——》输出
logstash配置文件
宏观配置文件格式
# 输入
input {
...
}
# 过滤器
filter {
...
}
# 输出
output {
...
}
配置文件示例1:
# 参考 https://www.jianshu.com/p/25ed5ed46682
# https://doc.yonyoucloud.com/doc/logstash-best-practice-cn/filter/kv.html
# https://www.cnblogs.com/qq27271609/p/4762562.html
# 日志格式
# [log_time=2018-11-01 15:47:03] [level=ERROR] [app_name=logback_test] [version=1.0.0] [class=com.wk.logbackdemo.LogbackTest] test error
input {
file {
#add_field => {"project_name" => "battleship"}
#tags => "tag1"
path => ["/home/es-wk/logstash/logs/info/*.log","/home/es-wk/logstash/logs/debug/*.log","/home/es-wk/logstash/logs/warn/*.log","/home/es-wk/logstash/logs/error/*.log"]
start_position => beginning
sincedb_path => "/dev/null" #从头读 第一读取时就加上才会生效 后面再加需要用新的日志文件 测试用
# 多行合并 日志中可能出现换行等,需要根据规则合并,如下注释是根据每行开头匹配其是不是另一条日志
# codec => multiline {
# pattern => "^%{TIMESTAMP_ISO8601} "
# negate => true
# what => previous
# }
}
}
# 可写多个 内部顺序执行
filter {
mutate {
# 替换掉开头的[
gsub => ["message", "\[", ""]
# 根据]分割字段
split => ["message", "] "]
add_field => { "log_time" => "%{[message][0]}"}
add_field => { "level" => "%{[message][1]}"}
add_field => { "app_name" => "%{[message][2]}"}
add_field => { "version" => "%{[message][3]}"}
add_field => { "class" => "%{[message][4]}"}
rename => ["host", "host_name"]
}
kv {
#include_keys => ["log_time", "level", "version", "class"]
field_split => "="
}
mutate {
replace => {"message" => "%{[message][5]}"}
}
# 提取年份 月份 日期
grok {
match => ["log_time", "(?<YYYY>\d{4})-(?<MM>\d{1,2})-(?<DD>\d{1,2})"]
}
}
output {
elasticsearch{
hosts => ["127.0.0.1:19200"]
index => "%{app_name}-%{YYYY}-%{MM}"
user => "elastic"
password => "changme"
}
# 调试用
stdout { codec => rubydebug }
}
配置文件示例2:
# 日志格式: 2018-12-14 16:49:21 [INFO] [TxId : Alien_axx21x003^1544669102193^46037 , SpanId : 2452398774611903875] com.dao.base.BaseDao.selectRowValueEqual selectRowValueEqual -->select * from ltemplate_style where template_id = ? and type = ? order by create_time desc
input {
stdin{
}
# beats {
# port => "5044"
# }
# file {
# #add_field => {"project_name" => "battleship"}
# #tags => "tag1"
# path => ["/home/es-wk/logstash/logs/info/*.log","/home/es-wk/logstash/logs/debug/*.log","/home/es-wk/logstash/logs/warn/*.log","/home/es-wk/logstash/logs/error/*.log"]
# start_position => beginning
# sincedb_path => "/dev/null" #从头读 第一读取时就加上才会生效 后面再加需要用新的日志文件 测试用
# # 多行合并 日志中可能出现换行等,需要根据规则合并,如下注释是根据每行开头匹配其是不是另一条日志
# codec => multiline {
# pattern => "^%{TIMESTAMP_ISO8601} "
# negate => true
# what => previous
# }
# }
}
# 可写多个 内部顺序执行
filter {
# 正则提取数据 ruby的正则
grok {
match => {"message" => "%{TIMESTAMP_ISO8601:log_time}"}
}
grok {
match => { "message" => "(?<level>(?<=\[)\w+(?=\]))" }
}
grok {
match => {"message" => "(?<tx_id>(?<=\[TxId\s:\s)\S+(?=\s,))"}
}
grok {
match => {"message" => "(?<span_id>(?<=,\sSpanId\s:\s)\S+(?=\s*\]))"}
}
grok {
match => {"message" => "(?<message>(?<=[0-9]\]\s)[\s\S]*(?=$))"}
}
# message只保留提示信息
mutate {
replace => {"message" => "%{[message][1]}"}
}
# 提取年份 月份 日期
grok {
match => ["log_time", "(?<YYYY>\d{4})-(?<MM>\d{1,2})-(?<DD>\d{1,2})"]
}
}
output {
#elasticsearch{
# hosts => ["127.0.0.1:19200"]
# index => "log-%{YYYY}-%{MM}-%{app_name}"
# user => "elastic"
# password => "changme"
#}
# 调试用
stdout { codec => rubydebug }
}
示例3:(任意数量[k=v]格式日志清洗)
input {
beats {
port => 5044
}
# stdin{}
}
#[log_time=$time_iso8601] [real_ip=$http_x_forwarded_for] [client_ip=$remote_addr] [status=$status] [request_time=$request_time] [uri=$request_uri] [referer=$http_referer][request=$request] [upstream_addr=$upstream_addr] [upstream_status=$upstream_status] [upstream_response_time=$upstream_response_time] [agent=$agent] [request_length=$request_length] [bytes_sent=$bytes_sent]
# docker run --rm -it -v /Users/wk/Desktop/logstash-online.conf:/usr/share/logstash/pipeline/logstash.conf docker.elastic.co/logstash/logstash:6.4.2
# [timestamp=2019-02-28 10:24:05.981] [level=DEBUG] [tx_id=] [span_id=] [bu_id=AXX] [app_id=AXX001] [msg=[xxx=111]] xxx=1111
# [timestamp=2019-03-12 14:13:09.267] [level=WARN] [tx_id=] [span_id=] [bu_id=AXX] [app_id=AXX001] Sync config from upstream redis.basic?ip=131 [Cause: Could not complete get operation [Cause: Read timed out]]
filter {
# (?!=\s)
grok {
# match => {"message" => "(?<message_fmt>((\[[\s\S]+\] )+))"}
match => {"message" => "(?<message_fmt>((\[[\s\S]+?\] *)+))"}
}
kv {
# kv化的源内容
source => "message_fmt"
# 多个kv的切割符
field_split_pattern => "\] "
# k的前缀
# prefix => "["
# kv的分割符
value_split => "="
# 去掉key的首尾字符
trim_key => "\[\]"
# 去掉value的首尾字符
trim_value => "\[\]"
# 是否递归取值
recursive => "false"
# 是否包含各种括号
include_brackets => "false"
# 是否允许多个相同的key存在(存在时会组成数组)
allow_duplicate_values => false
}
date {
match => ["timestamp", "yyyy-MM-dd HH:mm:ss.SSS"]
}
mutate {
remove_field => "%{}"
lowercase => ["app_id"]
# remove_field => "message_fmt"
remove_field => "timestamp"
# filebeat字段
remove_field => "tags"
remove_field => "prospector"
remove_field => "input"
remove_field => "prospector"
remove_field => "offset"
# remove_field => "host"
}
}
output {
# 调试用
stdout { codec => rubydebug }
}