package com.coder.flink.core.aaa_spark; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; import org.apache.commons.collections.map.HashedMap; import org.apache.flink.api.common.accumulators.LongCounter; import org.apache.flink.api.common.functions.FilterFunction; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.functions.RichMapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.common.state.ListState; import org.apache.flink.api.common.state.ListStateDescriptor; import org.apache.flink.api.common.state.ValueState; import org.apache.flink.api.common.state.ValueStateDescriptor; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.functions.KeySelector; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.tuple.Tuple5; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.TimeCharacteristic; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Map; import java.util.Properties; /** * 计算nginx到 flume1到flume2的离线时间统计 */ public class FlumeTime { public static void main(String[] args) { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //todo 获取kafka的配置属性 args = new String[]{"--input-topic", "wxgz_dianyou_topic", "--bootstrap.servers", "node2.hadoop:9091,node3.hadoop:9091", "--zookeeper.connect", "node1.hadoop:2181,node2.hadoop:2181,node3.hadoop:2181", "--group.id", "cc1"}; ParameterTool parameterTool = ParameterTool.fromArgs(args); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); Properties pros = parameterTool.getProperties(); //todo 指定偏移量消费 Map<KafkaTopicPartition, Long> offsets = new HashedMap(); offsets.put(new KafkaTopicPartition("dianyou_wxgz2", 0), 17578573L); // //todo 指定输入数据为kafka topic DataStream<String> kafkaDstream = env.addSource(new FlinkKafkaConsumer010<String>( // "wxgz_dianyou_topic", "dianyou_wxgz", // "dianyou_filter", new SimpleStringSchema(), // pros).setStartFromSpecificOffsets(offsets) // pros).setStartFromEarliest() pros).setStartFromLatest() ).setParallelism(6); //todo 拿到字段统计 DataStream<JSONObject> logDstream = kafkaDstream.map(new MapFunction<String, JSONObject>() { @Override public JSONObject map(String value) throws Exception { JSONObject logJson_old = JSON.parseObject(value); JSONObject logJson_next = new JSONObject(); logJson_next.put("deviceId", logJson_old.getString("deviceId")); logJson_next.put("flume1Time", logJson_old.getLong("flume1Time")); logJson_next.put("flume2Time", logJson_old.getLong("flume2Time")); logJson_next.put("urlTimestamp", Long.parseLong(logJson_old.getString("urlTimestamp").replace(".", ""))); logJson_next.put("id", "aa"); return logJson_next; } }).setParallelism(6); //todo 做统计 DataStream<JSONObject> lastLogDstream = logDstream.keyBy(new KeySelector<JSONObject, String>() { @Override public String getKey(JSONObject value) throws Exception { // return value.getString("deviceId"); return value.getString("id"); } }).map(new RichMapFunction<JSONObject, JSONObject>() { private transient ValueState<JSONObject> valueState; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); valueState = getRuntimeContext(). getState(new ValueStateDescriptor<>("valueState", JSONObject.class)); } @Override public JSONObject map(JSONObject log) throws Exception { String deviceId = log.getString("deviceId"); Long flume1Time = log.getLong("flume1Time"); Long flume2Time = log.getLong("flume2Time"); Long urlTimestamp = log.getLong("urlTimestamp"); //todo 统计storm 各个节点的 JSONObject state = valueState.value(); try { if (state == null) { JSONObject countvalue = new JSONObject(); countvalue.put("flink_count", 1);
Flink 简单统计参数代码
最新推荐文章于 2024-06-16 15:39:45 发布
![](https://img-home.csdnimg.cn/images/20240711042549.png)