环境
- centos 7.2
- JDK 11
- logstash 7.4 rpm
logstash rpm安装,参见Logstash RPM安装
语法
grok的正则,是在通用的正则基础上,加了正则表达的名称(变量名),这样就可以在其他的正则里调用了;
通用的正则,参见正则表达式
grok调用正则的语法为
%{PATTERN_NAME}
%{PATTERN_NAME:OUTPUT_FIELD_NAME}
PATTERN_NAME
匹配到的信息,会放在输出字段OUTPUT_FIELD_NAME
里;
grok内置的正则在
https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns
可以看到grok正则定义的格式
USERNAME [a-zA-Z0-9._-]+
USER %{USERNAME}
...
POSINT \b(?:[1-9][0-9]*)\b
...
IPORHOST (?:%{IP}|%{HOSTNAME})
HOSTPORT %{IPORHOST}:%{POSINT}
...
URIHOST %{IPORHOST}(?::%{POSINT:port})?
格式为: PATTERN_NAME regular_expression
,其中regular_expression可以调用其他的正则
实例
$ cd /var/tmp
$ vi logstash-filter.conf
input {
tcp {
port => 5000
type => test
}
}
filter {
if [type] == "test" {
grok {
match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}" }
add_field => [ "received_at", "%{@timestamp}" ]
add_field => [ "received_from", "%{host}" ]
}
}
}
output {
stdout { codec => rubydebug }
}
//# 命令行启动
$ /usr/share/logstash/bin/logstash "--path.settings" "/etc/logstash" -f logstash-filter.conf
默认接收到的信息是放在字段’message’里的;
grok的syslog正则定义为
DATA .*?
# Syslog Dates: Month Day HH:MM:SS
SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
PROG [\x21-\x5a\x5c\x5e-\x7e]+
SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])?
SYSLOGHOST %{IPORHOST}
SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}>
HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}
打开一个新终端输入
$ telnet localhost 5000
Dec 23 12:11:43 louis postfix/smtpd[31499]: connect from unknown[95.75.93.154]
Dec 23 14:42:56 louis named[16000]: client 199.48.164.7#64817: query (cache) 'amsterdamboothuren.com/MX/IN' denied
Dec 23 14:30:01 louis CRON[619]: (www-data) CMD (php /usr/share/cacti/site/poller.php >/dev/null 2>/var/log/cacti/poller-error.log)
Dec 22 18:28:06 louis rsyslogd: [origin software="rsyslogd" swVersion="4.2.0" x-pid="2253" x-info="http://www.rsyslog.com"] rsyslogd was HUPed, type 'lightweight'.
在原终端可以看到输出
{
"syslog_program" => "postfix/smtpd",
"message" => "Dec 23 12:11:43 louis postfix/smtpd[31499]: connect from unknown[95.75.93.154]\r",
"@version" => "1",
"syslog_message" => "connect from unknown[95.75.93.154]\r",
"host" => "localhost",
"syslog_timestamp" => "Dec 23 12:11:43",
"received_from" => "localhost",
"port" => 34971,
"syslog_hostname" => "louis",
"@timestamp" => 2019-10-17T07:55:09.234Z,
"syslog_pid" => "31499",
"received_at" => "2019-10-17T07:55:09.234Z",
"type" => "test"
}
{
"syslog_program" => "named",
"message" => "Dec 23 14:42:56 louis named[16000]: client 199.48.164.7#64817: query (cache) 'amsterdamboothuren.com/MX/IN' denied\r",
"@version" => "1",
"syslog_message" => "client 199.48.164.7#64817: query (cache) 'amsterdamboothuren.com/MX/IN' denied\r",
"host" => "localhost",
"syslog_timestamp" => "Dec 23 14:42:56",
"received_from" => "localhost",
"port" => 34971,
"syslog_hostname" => "louis",
"@timestamp" => 2019-10-17T07:55:20.447Z,
"syslog_pid" => "16000",
"received_at" => "2019-10-17T07:55:20.447Z",
"type" => "test"
}
{
"syslog_program" => "CRON",
"message" => "Dec 23 14:30:01 louis CRON[619]: (www-data) CMD (php /usr/share/cacti/site/poller.php >/dev/null 2>/var/log/cacti/poller-error.log)\r",
"@version" => "1",
"syslog_message" => "(www-data) CMD (php /usr/share/cacti/site/poller.php >/dev/null 2>/var/log/cacti/poller-error.log)\r",
"host" => "localhost",
"syslog_timestamp" => "Dec 23 14:30:01",
"received_from" => "localhost",
"port" => 34971,
"syslog_hostname" => "louis",
"@timestamp" => 2019-10-17T07:55:27.920Z,
"syslog_pid" => "619",
"received_at" => "2019-10-17T07:55:27.920Z",
"type" => "test"
}
{
"syslog_program" => "rsyslogd",
"message" => "Dec 22 18:28:06 louis rsyslogd: [origin software=\"rsyslogd\" swVersion=\"4.2.0\" x-pid=\"2253\" x-info=\"http://www.rsyslog.com\"] rsyslogd was HUPed, type 'lightweight'.\r",
"@version" => "1",
"syslog_message" => "[origin software=\"rsyslogd\" swVersion=\"4.2.0\" x-pid=\"2253\" x-info=\"http://www.rsyslog.com\"] rsyslogd was HUPed, type 'lightweight'.\r",
"host" => "localhost",
"syslog_timestamp" => "Dec 22 18:28:06",
"received_from" => "localhost",
"port" => 34971,
"syslog_hostname" => "louis",
"@timestamp" => 2019-10-17T07:55:35.254Z,
"received_at" => "2019-10-17T07:55:35.254Z",
"type" => "test"
}