logstash s3 写入 %{host} %{message}异常, 设置 codec => json_line 就能正常输出到s3文件了。
input {
kafka {
bootstrap_servers => ["a:9092,b:9092"]
topics => ["demo"]
group_id => "test_bigdata6"
consumer_threads => 6
decorate_events => true
auto_offset_reset => "latest"
#auto_offset_reset => "earliest"
codec => "json"
max_poll_records => "10000"
poll_timeout_ms => 1000
request_timeout_ms => "40000"
fetch_max_wait_ms => "500"
fetch_min_bytes => "1"
retry_backoff_ms => "100"
heartbeat_interval_ms => "3000"
type => "demo"
}
}
filter {
json {
source => "message"
}
date {
match => ["StartUTC", "yyyy-MM-dd HH:mm:ss", "ISO8601"]
target => "@timestamp"
}
ruby {
code => "event.set('dt', (event.get('@timestamp').time.localtime).strftime('%Y-%m-%d'))"
}
ruby {
code => "event.set('datetime', (event.get('@timestamp').time.localtime).strftime('%Y-%m-%d %H:%M:%S'))"
}
ruby {
code => "event.set('hour', (event.get('@timestamp').time.localtime).strftime('%H'))"
}
ruby {
code => "event.set('utc', (event.get('@timestamp').time.utc).strftime('%Y-%m-%d %H:%M:%S'))"
}
}
filter {
mutate{
remove_field => ["startlocal"]
remove_field => ["startutc"]
remove_field => ["ServiceURL"]
remove_field => ["message"]
remove_field => ["ecs"]
remove_field => ["agent"]
remove_field => ["input"]
remove_field => ["event"]
remove_field => ["tags"]
remove_field => ["log"]
remove_field => ["host"]
remove_field => ["fields"]
}
if [RequestPath] == "/ping" {
drop {}
}
}
output {
#stdout { codec => rubydebug }
s3 {
region => "ap-southeast-1"
bucket => "demo_bucket"
prefix => "log14/dt=%{+YYYY}-%{+MM}-%{+dd}/hour=%{+HH}"
codec => json_lines
}
}
DEMO2
input {
input {
logservice{
endpoint => "ap-southeast-1.log.aliyuncs.com"
access_id => "xx"
access_key => "xx"
project => "raw-logs-sg-gameserver"
logstore => "xzj-game-logs"
consumer_group => "yeeha-prod-prod3"
consumer_name => "ztest"
#position => "end"
position => "begin"
checkpoint_second => 30
include_meta => true
consumer_name_with_ip => false
}
}
filter {
json {
source => "message"
}
ruby {
code => "s=event.get('__raw_log__') || event.get('content'); s=s.scan(/{.*}/); s=s[0]; event.set('content', s);"
}
#mutate {
# gsub => ["content","'",'"']
#}
json {
source => "content"
}
date {
match => ["dtEventTime", "yyyy-MM-dd HH:mm:ss", "ISO8601"]
target => "@timestamp"
}
ruby {
code => "event.set('dt', (event.get('@timestamp').time.localtime).strftime('%Y-%m-%d'))"
}
ruby {
code => "event.set('datetime', (event.get('@timestamp').time.localtime).strftime('%Y-%m-%d %H:%M:%S'))"
}
ruby {
code => "event.set('hour', (event.get('@timestamp').time.localtime).strftime('%H'))"
}
ruby {
code => "event.set('utc', (event.get('@timestamp').time.utc).strftime('%Y-%m-%d %H:%M:%S'))"
}
}
#filter {
# mutate{
# remove_field => ["__raw_log__"]
# }
#}
output {
#stdout { codec => rubydebug }
s3{
region => "ap-southeast-1"
bucket => "davion-prod-gameplus-bigdata"
codec => "json_lines"
prefix => "log/name=%{[name]}/dt=%{+YYYY}-%{+MM}-%{+dd}/hour=%{+HH}"
size_file => 52428800
#size_file => 2048
}
}
input {
kafka {
bootstrap_servers => ["xxx:9092"]
topics => ["xzj-xxx-sls"]
auto_offset_reset => "latest"
#auto_offset_reset => "earliest"
group_id => "logstash_t1"
consumer_threads => 3
max_partition_fetch_bytes => 10048576
max_poll_records => 20000
}
}
filter {
json {
source => "message"
}
date {
match => ["dtEventTime", "yyyy-MM-dd HH:mm:ss", "ISO8601"]
target => "@timestamp"
}
ruby {
code => "
event.set('dt', (event.get('@timestamp').time.localtime).strftime('%Y-%m-%d'))
event.set('datetime', (event.get('@timestamp').time.localtime).strftime('%Y-%m-%d %H:%M:%S'))
event.set('hour', (event.get('@timestamp').time.localtime).strftime('%H'))
"
}
}
filter {
mutate{
remove_field => ["message"]
}
}
logstash 推荐设置batch.size: 2000,并且加大jvm.