logstash s3 写入 %{host} %{message}异常

logstash s3 写入 %{host} %{message}异常, 设置 codec => json_line 就能正常输出到s3文件了。

input {
        kafka {
            bootstrap_servers => ["a:9092,b:9092"]
            topics => ["demo"]
            group_id => "test_bigdata6"
            consumer_threads => 6
            decorate_events => true
            auto_offset_reset => "latest"
            #auto_offset_reset => "earliest"
            codec => "json"
            max_poll_records => "10000"
            poll_timeout_ms => 1000
            request_timeout_ms => "40000"
            fetch_max_wait_ms => "500"
            fetch_min_bytes => "1"
            retry_backoff_ms => "100"
            heartbeat_interval_ms => "3000"

            type => "demo"
        }
}


filter {
    json {
        source => "message"
    }

    date {
       match => ["StartUTC", "yyyy-MM-dd HH:mm:ss", "ISO8601"]
       target => "@timestamp"
    }

    ruby {
        code => "event.set('dt', (event.get('@timestamp').time.localtime).strftime('%Y-%m-%d'))"
    }
    ruby {
        code => "event.set('datetime', (event.get('@timestamp').time.localtime).strftime('%Y-%m-%d %H:%M:%S'))"
    }
    ruby {
        code => "event.set('hour', (event.get('@timestamp').time.localtime).strftime('%H'))"
    }
    ruby {
        code => "event.set('utc', (event.get('@timestamp').time.utc).strftime('%Y-%m-%d %H:%M:%S'))"
    }
}

filter {
    mutate{
        remove_field => ["startlocal"]
        remove_field => ["startutc"]
        remove_field => ["ServiceURL"]
        remove_field => ["message"]
        remove_field => ["ecs"]
        remove_field => ["agent"]
        remove_field => ["input"]
        remove_field => ["event"]
        remove_field => ["tags"]
        remove_field => ["log"]
        remove_field => ["host"]
        remove_field => ["fields"]
    }

    if [RequestPath] == "/ping" {
        drop {}
    }
}

output {
   #stdout { codec => rubydebug }

   s3 {
      region => "ap-southeast-1"
      bucket => "demo_bucket"
      prefix => "log14/dt=%{+YYYY}-%{+MM}-%{+dd}/hour=%{+HH}"
      codec => json_lines
   }
}

DEMO2

input {
input {
  logservice{
    endpoint => "ap-southeast-1.log.aliyuncs.com"
    access_id => "xx"
    access_key => "xx"
    project => "raw-logs-sg-gameserver"
    logstore => "xzj-game-logs"
    consumer_group => "yeeha-prod-prod3"
    consumer_name => "ztest"
    #position => "end"
    position => "begin"
    checkpoint_second => 30
    include_meta => true
    consumer_name_with_ip => false
  }
}

filter {
    json {
        source => "message"
    }

    ruby {
	code => "s=event.get('__raw_log__') || event.get('content'); s=s.scan(/{.*}/); s=s[0]; event.set('content', s);"
    }

    #mutate {
    #    gsub => ["content","'",'"']
    #}

    json {
	    source => "content"
    }

    date {
       match => ["dtEventTime", "yyyy-MM-dd HH:mm:ss", "ISO8601"]
       target => "@timestamp"
    }

    ruby {
        code => "event.set('dt', (event.get('@timestamp').time.localtime).strftime('%Y-%m-%d'))"
    }
    ruby {
        code => "event.set('datetime', (event.get('@timestamp').time.localtime).strftime('%Y-%m-%d %H:%M:%S'))"
    }
    ruby {
        code => "event.set('hour', (event.get('@timestamp').time.localtime).strftime('%H'))"
    }
    ruby {
        code => "event.set('utc', (event.get('@timestamp').time.utc).strftime('%Y-%m-%d %H:%M:%S'))"
    }
}


#filter {
#    mutate{
#        remove_field => ["__raw_log__"]
#    }
#}


output {
   #stdout { codec => rubydebug }

   s3{
     region => "ap-southeast-1"
     bucket => "davion-prod-gameplus-bigdata"
     codec => "json_lines"
     prefix => "log/name=%{[name]}/dt=%{+YYYY}-%{+MM}-%{+dd}/hour=%{+HH}"
     size_file => 52428800
     #size_file => 2048
   }
}

input {
  kafka {
    bootstrap_servers => ["xxx:9092"]
    topics => ["xzj-xxx-sls"]
    auto_offset_reset => "latest"
    #auto_offset_reset => "earliest"
    group_id => "logstash_t1"
    consumer_threads => 3
    max_partition_fetch_bytes => 10048576
    max_poll_records => 20000
  }
}

filter {
    json {
        source => "message"
    }

    date {
       match => ["dtEventTime", "yyyy-MM-dd HH:mm:ss", "ISO8601"]
       target => "@timestamp"
    }

    ruby {
        code => "
                event.set('dt', (event.get('@timestamp').time.localtime).strftime('%Y-%m-%d'))
                event.set('datetime', (event.get('@timestamp').time.localtime).strftime('%Y-%m-%d %H:%M:%S'))
                event.set('hour', (event.get('@timestamp').time.localtime).strftime('%H'))
        "
    }
}

filter {
    mutate{
        remove_field => ["message"]
    }
}

logstash 推荐设置batch.size: 2000,并且加大jvm.

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
Logstash可以通过Flink Output插件将数据写入到Flink中。以下是一些基本步骤: 1. 在Logstash中安装Flink Output插件。可以使用命令`bin/logstash-plugin install logstash-output-flink`进行安装。 2. 在Logstash的配置文件中添加Flink Output插件的配置。以下是一个示例配置: ``` output { flink { host => "localhost" port => 6123 job_id => "my_job" operator_name => "my_operator" } } ``` 其中`host`和`port`指定Flink的地址和端口,`job_id`指定Flink作业的ID,`operator_name`指定Flink中写入数据的算子名称。 3. 在Flink中编写作业,用于处理Logstash写入的数据。可以使用Flink的DataStream API或者Table API来编写作业。以下是一个示例作业: ``` StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<String> stream = env.addSource(new FlinkKafkaConsumer<>("my_topic", new SimpleStringSchema(), properties)); stream.print(); env.execute("My Job"); ``` 其中`my_topic`指定要读取的Kafka主题,`SimpleStringSchema`用于解析Kafka消息中的字符串数据,`print()`用于输出数据到控制台。 4. 运行Logstash并等待数据被写入到Flink中。可以使用命令`bin/logstash -f /path/to/my/config.conf`来启动Logstash。 以上是通过Logstash写入Flink的基本步骤。需要注意的是,Flink Output插件还有其他配置选项,可以根据需要进行调整。同时,Flink也支持多种数据处理和计算模型,可以根据实际需求进行编写作业。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值