调试正则的工具: http://grokdebug.herokuapp.com/
注意:add 的field或者tag或者解析时字段的命令一定不能是关键字,如type
解析例子:
input {
beats {
add_field => {"myid"=>"nginx"}
port => 5043
}
beats {
add_field => {"myid"=>"java"}
port =>5044
}
}
filter {
if [myid] == "nginx" {
grok {
match => {
"message" => "^(?<domain>%{IP:ip}|(?:%{NOTSPACE:subsite}\.)?(?<site>[-a-zA-Z0-9]+?).com|%{NOTSPACE:unknown}) %{IPORHOST:dayuip} - (?<user>[a-zA-Z\.\@\-\+_%]+) \[%{HTTPDATE:timestamp}\] \"%{WORD:verb} (?<request_path>(?<biz>\/[^/?]*)%{URIPATH:}?)(?:%{URIPARAM:request_param})? HTTP/%{NUMBER:httpversion}\" %{NUMBER:response} (?:%{NUMBER:bytes}|-) (?:%{BASE10NUM:request_duration}|-) (?:\"(?:%{URI:referrer}|-)\"|%{QS:referrer}) %{QS:agent} \"(?:%{IPORHOST:clientip}(?:[^\"]*)|-)\" %{QS:uidgot} %{QS:uidset} \"(?:[^\" ]* )*(?<upstream>[^ \"]*|-)\"$"
}
}
date {
locale => "en"
timezone => "Asia/Shanghai"
match => [ "timestamp", "dd/MMM/yyyy:HH:mm:ss Z" ]
}
mutate {
convert => { "bytes" => "integer" "request_duration" => "float"}
}
}
if [myid] == "java" {
if [source] =~ /.+-phplog.log/ {
grok {
match => {
"message" => "\[entry\]\[ts\](?<ts>.*)\[/ts\]\[lv\](?<lv>.*)\[/lv\]\[th\](?<th>.*)\[/th\]\[lg\](?<lg>.*)\[/lg\]\[cl\](?<cl>.*)\[/cl\]\[m\](?<m>.*)\[/m\]\[ln\](?<ln>.*)\[/ln\]\[bsid\](?<bsid>.*)\[/bsid\]\[esid\](?<esid>.*)\[/esid\]\[txt\](?<txt>.*)\[/txt\]\[proj\](?<proj>.*)\[/proj\]\[iid\](?<iid>.*)\[/iid\]\[file\](?<file>.*)\[/file\]\[ex\](?<ex>.*)\[/ex\]\[type\](?<logtype>.*)\[/type\]\[/entry\]"
}
}
mutate {
#去掉没用的字段
remove_field => ["type","logtype"]
}
} else {
grok {
match => {
"message" => "\[entry\]\[ts\](?<ts>.*)\[/ts\]\[lv\](?<lv>.*)\[/lv\]\[th\](?<th>.*)\[/th\]\[lg\](?<lg>.*)\[/lg\]\[cl\](?<cl>.*)\[/cl\]\[m\](?<m>.*)\[/m\]\[ln\](?<ln>.*)\[/ln\]\[bsid\](?<bsid>.*)\[/bsid\]\[esid\](?<esid>.*)\[/esid\](\[cmid\](?<cmid>.*)\[/cmid\])?\[txt\](?<txt>.*)\[/txt\]\[ex\](?<ex>.*)\[/ex\]\[/entry\]"
}
}
grok {
match => {
"source" => "(?<proj>[^/]+)-(?<iid>\w+)-\w+\.log"
}
}
}
mutate {
rename => {
"source" => "file"
"offset" => "seq"
}
}
mutate {
#去掉没用的字段
remove_field => ["input_type","count","tags","message","@version","beat","fields","offset","source"]
}
date {
match => ["ts",'yyyy-MM-dd$HH:mm:ss.SSS','yyyy-MM-dd$HH:mm:ss.SSSZ']
}
} # endif_javalog
}
output{
if [myid] == "nginx" {
elasticsearch {
hosts => ["192.168.5.201:9200"]
index => "log-nginx-%{+YYYY.MM.dd}"
}
http {
format=>"json"
http_method=>"post"
# # url => "http://192.168.1.68:8990/api/v1/metrics"
url => "http://agg.we.com/api/v1/acclog"
}
}
if [myid] == "java" {
if [host] == "zy-java1"
{
elasticsearch {
hosts => ["192.168.5.201:9200"]
index => "log-java-call-uat-%{+YYYY.MM.dd}"
}
}
if [host] == "JAVA1"
{
elasticsearch {
hosts => ["192.168.5.201:9200"]
index => "log-java-call-%{+YYYY.MM.dd}"
}
}
if [host] == "JAVA2"
{
elasticsearch {
hosts => ["192.168.5.201:9200"]
index => "log-java-call-%{+YYYY.MM.dd}"
}
}
}
}