logstash过滤器filter grok多种日志匹配使用心得

例:

 #A=1 B= 2 C=3

 #A=1

filter {
grok {

match => { "message" => "(#A=%{NUMBER:A}%{SPACE}B=%{NUMBER:B}%{SPACE}C=%{NUMBER:C}|#A=%{NUMBER:A})"}

}
}


如果有多种情况就用正则中的或来做匹配,字段过多会报错,目前查找解决办法中。

自带正则匹配表


 
USERNAME [a-zA-Z0-9._-]+
 
USER %{USERNAME}
 
EMAILLOCALPART [a-zA-Z][a-zA-Z0-9_.+-=:]+
 
EMAILADDRESS %{EMAILLOCALPART}@%{HOSTNAME}
 
INT (?:[+-]?(?:[0-9]+))
 
BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+)))
 
NUMBER (?:%{BASE10NUM})
 
BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
 
BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b
   
 
POSINT \b(?:[1-9][0-9]*)\b
 
NONNEGINT \b(?:[0-9]+)\b
 
WORD \b\w+\b
 
NOTSPACE \S+
 
SPACE \s*
 
DATA .*?
 
GREEDYDATA .*
 
QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``))
 
UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}
 
# URN, allowing use of RFC 2141 section 2.3 reserved characters
 
URN urn:[0-9A-Za-z][0-9A-Za-z-]{0,31}:(?:%[0-9a-fA-F]{2}|[0-9A-Za-z()+,.:=@;$_!*'/?#-])+
   
 
# Networking
 
MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
 
CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})
 
WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})
 
COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})
 
IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?
 
IPV4 (?<![0-9])(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))(?![0-9])
 
IP (?:%{IPV6}|%{IPV4})
 
HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)
 
IPORHOST (?:%{IP}|%{HOSTNAME})
 
HOSTPORT %{IPORHOST}:%{POSINT}
   
 
# paths
 
PATH (?:%{UNIXPATH}|%{WINPATH})
 
UNIXPATH (/([\w_%!$@:.,+~-]+|\\.)*)+
 
TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+))
 
WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+
 
URIPROTO [A-Za-z]([A-Za-z0-9+\-.]+)+
 
URIHOST %{IPORHOST}(?::%{POSINT:port})?
 
# uripath comes loosely from RFC1738, but mostly from what Firefox
 
# doesn't turn into %XX
 
URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%&_\-]*)+
 
#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)?
 
URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]*
 
URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
 
URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
   
 
# Months: January, Feb, 3, 03, 12, December
 
MONTH \b(?:[Jj]an(?:uary|uar)?|[Ff]eb(?:ruary|ruar)?|[Mm](?:a|ä)?r(?:ch|z)?|[Aa]pr(?:il)?|[Mm]a(?:y|i)?|[Jj]un(?:e|i)?|[Jj]ul(?:y)?|[Aa]ug(?:ust)?|[Ss]ep(?:tember)?|[Oo](?:c|k)?t(?:ober)?|[Nn]ov(?:ember)?|[Dd]e(?:c|z)(?:ember)?)\b
 
MONTHNUM (?:0?[1-9]|1[0-2])
 
MONTHNUM2 (?:0[1-9]|1[0-2])
 
MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])
   
 
# Days: Monday, Tue, Thu, etc...
 
DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)
   
 
# Years?
 
YEAR (?>\d\d){1,2}
 
HOUR (?:2[0123]|[01]?[0-9])
 
MINUTE (?:[0-5][0-9])
 
# '60' is a leap second in most time standards and thus is valid.
 
SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)
 
TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
 
# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it)
 
DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}
 
DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}
 
ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
 
ISO8601_SECOND (?:%{SECOND}|60)
 
TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
 
DATE %{DATE_US}|%{DATE_EU}
 
DATESTAMP %{DATE}[- ]%{TIME}
 
TZ (?:[APMCE][SD]T|UTC)
 
DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
 
DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}
 
DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
 
DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}
   
 
# Syslog Dates: Month Day HH:MM:SS
 
SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
 
PROG [\x21-\x5a\x5c\x5e-\x7e]+
 
SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])?
 
SYSLOGHOST %{IPORHOST}
 
SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}>
 
HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}
   
 
# Shortcuts
 
QS %{QUOTEDSTRING}
   
 
# Log formats
 
SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:
   
 
# Log Levels
 
LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?)


Grok过滤器Logstash中一种用于解析和匹配日志消息的工具。它使用正则表达式来匹配文本模式,并将匹配的值存储在新字段中。然而,当模式不匹配时,Grok过滤器可能会遇到性能问题。为了解决这个问题,可以考虑改用基于分隔符的Dissect过滤器,它比基于正则表达式的过滤器更容易编写和使用。不幸的是,目前还没有针对此的应用程序。要使用Grok过滤器,可以在配置文件中指定字段名称和匹配模式,如: ``` filter { grok { match => [ "message", "%{USERNAME:user}" ] } } ``` 这将从"message"字段中提取匹配的用户名,并将其存储在新的"user"字段中。另一方面,如果想在Logstash中改变应用程序的日志记录模式,可能会增加直接日志读取的困难。在这种情况下,可以考虑使用Logstash的Dissect过滤器,并进行相应的配置。一个可能的配置示例如下: ``` filter { dissect { mapping => { "message" => ... } } mutate { strip => [ "log", "class" ] } } ``` 这个配置将使用Dissect过滤器根据指定的映射将日志消息拆分成多个字段,并通过mutate过滤器删除不需要的字段,例如"log"和"class"。综上所述,Grok过滤器Logstash中用于解析和匹配日志消息的一种工具,而Dissect过滤器则是一种更简单易用的基于分隔符的过滤器。<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* *2* *3* [Logstash:如何使用 Logstash Grok 过滤器提取模式](https://blog.csdn.net/UbuntuTouch/article/details/107512971)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 100%"] [ .reference_list ]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值