一、Filebeat
配置
filebeat.prospectors:
# 一台机器上部署多个应用服务,1个filebeat收集这些应用服务的日志
# type对应一个应用服务,并填写 `fields.source`来标识是哪个服务
- type: log
enabled: true
include_lines: ['^\[ERROR', '^\[WARN']
multiline:
pattern: '^\['
negate: true
match: after
paths:
- /opt/donaldy/user/logs/*.log
exclude_files: [".*error.*"]
ignore_older: 24h
clean_inactive: 36h
fields:
# 对应的应用名
source: user
# user user log
- type: log
enabled: true
include_lines: ['.*\[USER_INFO\].*']
exclude_lines: ['^\[ERROR', '^\[WARN']
multiline:
pattern: '^\['
negate: true
match: after
paths:
- /opt/donaldy/user/logs/*.log
exclude_files: [".*error.*"]
ignore_older: 24h
clean_inactive: 36h
fields:
source: user
type: user
filebeat.config.modules:
# Glob pattern for configuration loading
path: ${path.config}/modules.d/*.yml
# Set to true to enable config reloading
reload.enabled: false
output.logstash:
# The Logstash hosts
hosts: ["127.0.0.1:5044"]
二、Logstash
配置
input {
beats {
port => 5044
}
}
filter {
if [fields][type] == "user" {
grok {
match => { "message" => ["\[%{LOGLEVEL:logLevel}\]\[%{NUMBER:nanoTime:integer}\] %{TIMESTAMP_ISO8601:time} %{DATA:method} - \[%{WORD:log_type}\]\ : \((?<phone_number>([\s\S]*))\), \((?<nick>([\s\S]*))\), \((?<account_id>([\s\S]*))\), \((?<account_name>([\s\S]*))\), \((?<action>([\s\S]*))\), \((?<ip>([\s\S]*))\), \((?<brand>([\s\S]*))\), \((?<model>([\s\S]*))\), \((?<network>([\s\S]*))\), \((?<isp>([\s\S]*))\), \((?<os>([\s\S]*))\), \((?<client_version>([\s\S]*))\), \((?<device>([\s\S]*))\), \((?<extra>([\s\S]*))\)"] }
}
} else {
grok {
match => { "message" => ["\[%{LOGLEVEL:logLevel}\]\[%{NUMBER:nanoTime:integer}\] %{TIMESTAMP_ISO8601:time} (?<desc>.*)"] }
}
mutate {
rename => [ "[fields][source]" , "application_name" ]
remove_field => [ "fields" ]
}
}
}
output {
if [fields][type] == "user" {
elasticsearch {
hosts => ["192.168.1.22:9200", "192.168.1.23:9200", "192.168.1.24:9200"]
codec => json
index => "user_log_index_%{+YYYY-MM}"
}
} else {
elasticsearch {
hosts => ["192.168.1.22:9200", "192.168.1.23:9200", "192.168.1.24:9200"]
codec => json
index => "user_error_index_%{+YYYY-MM}"
}
}
}
参考资料:
https://www.elastic.co/cn/blog/how-to-find-and-remove-duplicate-documents-in-elasticsearch
https://www.elastic.co/cn/blog/logstash-lessons-handling-duplicates