package com.coder.flink.core.aaa_spark;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
import java.util.Properties;
/**
* 统计kafka 的数据
*/
public class StormTimeCount {
public static void main(String[] args) {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//todo 获取kafka的配置属性
args = new String[]{"--input-topic", "wxgz_dianyou_topic", "--bootstrap.servers", "node2.hadoop:9091,node3.hadoop:9091",
"--zookeeper.connect", "node1.hadoop:2181,node2.hadoop:2181,node3.hadoop:2181", "--group.id", "cc1"};
ParameterTool parameterTool = ParameterTool.fromArgs(args);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
Properties pros = parameterTool.getProperties();
// //todo 指定输入数据为kafka topic
DataStream<String> kafkaDstream = env.addSource(new FlinkKafkaConsumer010<String>(
"wxgz_dianyou_topic",
// "dianyou_filter",
new SimpleStringSchema(),
// pros).setStartFromEarliest()
pros).setStartFromLatest()
).setParallelism(6);
//todo 拿到字段统计
DataStream<JSONObject> logDstream = kafkaDstream.filter(new FilterFunction<String>() {
@Override
public boolean filter(String value) throws Exception {
JSONObject logJson = JSON.parseObject(value);
if (!logJson.containsKey("nginx_storm")) {
return false;
}
return true;
}
}).map(new MapFunction<String, JSONObject>() {
@
Flink简单的统计异常数据,写入到redis里面。
最新推荐文章于 2024-05-04 23:11:47 发布