Flink 简单统计参数代码

本文介绍如何使用Apache Flink进行基本的数据统计操作。通过示例代码展示如何计算数据流中的平均值、最大值和最小值,适用于实时数据处理场景。
摘要由CSDN通过智能技术生成
package com.coder.flink.core.aaa_spark;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.collections.map.HashedMap;
import org.apache.flink.api.common.accumulators.LongCounter;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;

import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import java.util.Properties;

/**
 *  计算nginx到 flume1到flume2的离线时间统计
 */
public class FlumeTime {
    public static void main(String[] args) {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //todo 获取kafka的配置属性
        args = new String[]{"--input-topic", "wxgz_dianyou_topic", "--bootstrap.servers", "node2.hadoop:9091,node3.hadoop:9091",
                "--zookeeper.connect", "node1.hadoop:2181,node2.hadoop:2181,node3.hadoop:2181", "--group.id", "cc1"};

        ParameterTool parameterTool = ParameterTool.fromArgs(args);

        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        Properties pros = parameterTool.getProperties();

        //todo 指定偏移量消费
        Map<KafkaTopicPartition, Long> offsets = new HashedMap();
        offsets.put(new KafkaTopicPartition("dianyou_wxgz2", 0), 17578573L);


//        //todo 指定输入数据为kafka topic
        DataStream<String> kafkaDstream = env.addSource(new FlinkKafkaConsumer010<String>(
//                        "wxgz_dianyou_topic",
                        "dianyou_wxgz",
//                "dianyou_filter",
                        new SimpleStringSchema(),
//                        pros).setStartFromSpecificOffsets(offsets)
//                        pros).setStartFromEarliest()
                        pros).setStartFromLatest()

        ).setParallelism(6);
        //todo 拿到字段统计
        DataStream<JSONObject> logDstream = kafkaDstream.map(new MapFunction<String, JSONObject>() {
            @Override
            public JSONObject map(String value) throws Exception {
                JSONObject logJson_old = JSON.parseObject(value);
                JSONObject logJson_next = new JSONObject();
                logJson_next.put("deviceId", logJson_old.getString("deviceId"));
                logJson_next.put("flume1Time", logJson_old.getLong("flume1Time"));
                logJson_next.put("flume2Time", logJson_old.getLong("flume2Time"));
                logJson_next.put("urlTimestamp", Long.parseLong(logJson_old.getString("urlTimestamp").replace(".", "")));
                logJson_next.put("id", "aa");
                return logJson_next;
            }
        }).setParallelism(6);

        //todo 做统计
        DataStream<JSONObject> lastLogDstream = logDstream.keyBy(new KeySelector<JSONObject, String>() {
            @Override
            public String getKey(JSONObject value) throws Exception {
//                return value.getString("deviceId");
                return value.getString("id");
            }
        }).map(new RichMapFunction<JSONObject, JSONObject>() {
            private transient ValueState<JSONObject> valueState;

            @Override
            public void open(Configuration parameters) throws Exception {
                super.open(parameters);
                valueState = getRuntimeContext().
                        getState(new ValueStateDescriptor<>("valueState", JSONObject.class));

            }

            @Override
            public JSONObject map(JSONObject log) throws Exception {

                String deviceId = log.getString("deviceId");
                Long flume1Time = log.getLong("flume1Time");
                Long flume2Time = log.getLong("flume2Time");
                Long urlTimestamp = log.getLong("urlTimestamp");


                //todo 统计storm 各个节点的

                JSONObject state = valueState.value();
                try {
                    if (state == null) {
                        JSONObject countvalue = new JSONObject();

                        countvalue.put("flink_count", 1);
    
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值