如下配置不能实际运行,仅用于格式及配置流程的参考使用!(平时常用的插件汇总)
# -----------------------------------------------------------
# Filebeat中设置唯一的文档ID防止数据重复 (根据若干字段计算哈希指纹)
# processors:
# - fingerprint:
# fields: ["field1", "field2"]
# target_field: "@metadata._id"
# -----------------------------------------------------------
input {
# 当从kafka消费数据时 Topic 下同组的logstash总的消费线程数量和总的 Partition 数量需保持一致!
tcp {
port => 666
codec => plain {
charset => "UTF-8" # charset => "GBK"
}
}
file {
codec => plain {
charset => "UTF-8"
}
# codec => multiline {
# pattern => "<?xml version" # 多行匹配时使用的正则匹配模式
# negate => "true" #
# what => "previous" #
# }
path => "/tmp/**/*.log" #
sincedb_path => "/dev/null" # 测试时取消使用 sincedb
discover_interval => 1
start_position => "beginning"
# type => "xml"
}
generator { # 生成测试数据,测试时使用
count => 10000000 # 应生成多少条消息
req_field => '{"key1":"value1","key2":[1,2],"key3":{"subkey1":"subvalue1"}}'
codec => json
}
}
filter {
grok {
id => "grok_1" # 命名 ID 有助于在使用监控 API 时监控 Logstash
match => { # 多项匹配
"req_field" => [
"%{DATA:hostname}\|%{DATA:tag}\|%{DATA:types}\|%{DATA:uid}\|%{GREEDYDATA:msg}",
"%{DATA:hostname}\|%{DATA:tag}\|%{GREEDYDATA:msg}"
]
}
overwrite => ["req_field"] # 对 req_field 字段进行覆盖
# target => "parse_json" # 用于放置匹配项的目标命名空间,此设置没有默认值
keep_empty_captures => false # 为 true 时将捕获失败的字段设为空值
add_field => { "foo_%{field}" => "%{host}" } # 若匹配成功则向此事件添加任意字段
remove_field => ["foo_%{field}","type"] # 从 Event 中删除字段
tag_on_failure => ["_grokparsefailure"] # 默认的
patterns_dir => ["/etc/logstash/pattern_dir/patterns"]
pattern_definitions => {
"APACHE_TIME" => "%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR}"
}
}
useragent {
source => "[apache2][access][agent]" # 对req_field里的哪个字段进行分析
target => "[apache2][access][user_agent]" # 将分析出的user agent信息以列表形式存放在指定字段
remove_field => "[apache2][access][agent]" #
}
urldecode {
field => req_field # 将URL编码后的乱码信息解码为明文
}
# 根据字典或查找文件过滤传入数据中的特定字段,若输入字段与字典查找文件中的条目匹配则执行操作,例如将字段添加到数据或发邮件
translate {
field => "response_code" # 针对哪个字段进行处理
destination => "http_response" # 处理后写入哪个字段
regex => false # 是否将字典的键视为正则
dictionary => { # 匹配为200状态码时,写入字段的内容 ...
"200" => "OK"
"403" => "Forbidden"
"404" => "Not Found"
"408" => "Request Timeout"
}
# dictionary_path => '/tmp/blacklisted_ip.yaml'
remove_field => "response_code"
}
# 从字段中解析时间戳
date {
match => ["[creatime]", # 时间字段
"yyyy-MM-dd HH:mm:ss", #
"yyyy-MM-dd HH:mm:ss Z", #
"MMM dd, yyyy HH:mm:ss aa", # Oct 16, 2020 11:59:53 PM
"yyyy-MM-dd HH:mm:ss.SSS", #
"ISO8601", # 2018-06-17T03:44:01.103Z ( Z 后面可以有 "08:00" 也可以没 )
"UNIX", # UNIX时间戳格式记录的是从 1970 年起始至今的总秒数
"UNIX_MS" # 从 1970 年起始至今的总毫秒数
]
target => "@timestamp"
timezone => "Asia/Shanghai"
}
geoip { # http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.tar.gz
database => "/opt/mdstack/share/geo/GeoLite2-City.mmdb"
source => "src_ipv4" # Tips: Mapping 类型 geoip.location 需改为: geo_point
fields => ["country_name", "region_name", "city_name", "latitude", "longitude"]
}
# 对info内容检查,判断消息内容生成对应 key:value 信息
if [loglevel] == "ERROR" and [className] == "db" {
if "insert" in [info] {
mutate { add_field => { "reason" => "error about insert"}}
} else if "update" in [info] {
mutate { add_field => { "reason" => "error about update" }}
} else {
mutate { add_field => { "reason" => "Non SQL error" }}
}
}
if ![ErrorMethod] {
mutate {
add_field => { "ErrorMethod" => "Non ErrorMethod" }
}
}
mutate {
gsub => ["info","\r",""] # 正则替换
add_field => { "level" => "%{loglevel}" } # 增加字段
remove_field => ["req_field","creatime"] # 删除字段
convert => ["reqTime","integer","bytes","integer"] # 类型转换
split => ["req_field", " || "] # 按特定字符拆分字段内容为列表
add_field => {
"time_local" => "%{[req_field][0]}"
"remote_addr" => "%{[req_field][1]}"
"upstream_addr" => "%{[req_field][2]}"
"status" => "%{[req_field][3]}"
"request_time" => "%{[req_field][4]}"
"upstream_status" => "%{[req_field][5]}"
"upstream_response_time" => "%{[req_field][6]}"
"upstream_cache_status" => "%{[req_field][7]}"
"body_bytes_sent" => "%{[req_field][8]}"
"http_referer" => "%{[req_field][9]}"
"remote_user" => "%{[req_field][10]}"
"http_user_agent" => "%{[req_field][11]}"
"http_x_forwarded_for" => "%{[req_field][12]}"
"request" => "%{[req_field][13]}"
}
}
if "_grokparsefailure" in [tags] {
drop{}
# ruby { code => "event.cancel" }
}
}
output {
codec => rubydebug { #
metadata => true # 输出时携带元数据信息,用于DEBUG
}
stdout { codec => dots } # 将每个 event 输出为一个个 . 在屏幕中(测试用)
# https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html#plugins-outputs-elasticsearch-pipeline
elasticsearch {
action => "%{[@metadata][action]}"
document_id => "%{[@metadata][_id]}" # 使用filebeat提供的文档ID实现去重(如非必要,建议自动生成ID)
hosts => ["example.com"]
index => "index_name"
protocol => "http" # 新插件支持三种协议 node、http、transport
workers => 5 #
# ilm_rollover_alias => "myinapplications_esb" # 发生 rollover 时的写入索引的别名
# ilm_pattern => "000001" # 该值与 ilm_rollover_alias 共同构成索引名: applications_esb-000001
# ilm_policy => "applications_log" # 使用的索引策略
template_name => "applications_log" # 使用的索引模版名称 (在 Elasticsearch 内部模板的名字)
user => "elasticsearch" #
password => "xxxxxx" #
# manage_template => false #
# template_overwrite => true #
# template => "/path/tmpl.json" #
pipeline => "...." # 为事件执行的摄取管道
proxy => "...." # 设置转发HTTP代理的地址
# 当为 Beats 或 Logstash 的 Elasticsearch 输出插件启用索引生命周期管理时,会自动配置默认的 ILM 策略
}
file {
path => "/path/%{+yyyy/MM/dd/HH}/%{host}.log.gz"
# 默认输出整个 event 的JSON形式数据
# 这可能与大多数使用者期望不符,可能只是希望按照日志原始格式保存就好了,所以需定义为 %{req_field}
req_field_format => "%{req_field}"
# gzip => true
}
}