flink实时统计新增数据

package com.biaodian.flink.taskregister;

import com.biaodian.flink.constant.Constant;
import com.biaodian.flink.dto.RegisterDto;
import com.biaodian.flink.function.register.*;
import com.biaodian.flink.keyby.RegisterKeyBy;
import com.biaodian.flink.keyby.RegisterSiteKeyBy;
import com.biaodian.flink.tool.CountOrProcessingTimeTrigger;
import com.biaodian.flink.tool.PropertiesTool;
import com.biaodian.flink.tool.PropertiesUtil;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;

import java.util.Date;
import java.util.Properties;
import java.util.TimeZone;

/**
 * 新增用户数实时统计
 *
 * @author 赵阳光
 * @date 2021-08-16 10:35
 */
public class FlinkRegisterUser {
    public static void main(String[] args) throws Exception {
        //时区设置
        TimeZone.setDefault(TimeZone.getTimeZone("Asia/Shanghai"));
        System.out.println("当前时间=" + new Date());
        //保存点路径
        String path = FlinkRegisterUser.class.getClassLoader().getResource("").getPath();
        //检查点保存路径
        path = Constant.CHECKPOINT_PREFIX + path + Constant.CHECKPOINT_REGISTER_SUFFIX;
        System.out.println("保存点路径=" + path);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(Constant.ENABLE_CHECKPOINTING);
        CheckpointConfig checkpointConfig = env.getCheckpointConfig();
        checkpointConfig.setMinPauseBetweenCheckpoints(Constant.MIN_PAUSE_BETWEEN_CHECKPOINTS);
        checkpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConfig.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        checkpointConfig.setCheckpointStorage(path);
        //kafka配置信息
        String topicName = PropertiesTool.getProperty("kafka.register.topic");
        Properties properties = PropertiesUtil.getProperties();
        properties.setProperty("group.id", PropertiesTool.getProperty("kafka.register.topic.group-id"));
        FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<String>(topicName, new SimpleStringSchema(), properties);
        //kafka数据源
        DataStream<String> source = env.addSource(consumer).uid("kafka");
        //数据转化
        DataStream<RegisterDto> map = source.map(new RegisterMapFunction()).uid("map");
        //数据过滤 只取今天的数据
        DataStream<RegisterDto> sourceData = map.filter(new RegisterFilterFunction()).uid("filter");
        //开窗处理数据 platform pvType siteId
        DataStream<RegisterDto> register = sourceData.keyBy(new RegisterKeyBy())
                .window(TumblingProcessingTimeWindows.of(Time.days(Constant.INTEGER_ONE), Time.hours(-8)))
                .trigger(CountOrProcessingTimeTrigger.of(Time.seconds(Constant.INTEGER_ONE), 1000L))
                .aggregate(new RegisterAggregateFunction(), new RegisterWindowFunction()).uid("register");
        DataStream<RegisterDto> registerSite = sourceData.keyBy(new RegisterSiteKeyBy())
                .window(TumblingProcessingTimeWindows.of(Time.days(Constant.INTEGER_ONE), Time.hours(-8)))
                .trigger(CountOrProcessingTimeTrigger.of(Time.seconds(Constant.INTEGER_ONE), 1000L))
                .aggregate(new RegisterSiteAggregateFunction(), new RegisterSiteWindowFunction()).uid("registerSite");
        //添加到redis缓存中 siteId
        /*register.addSink(new RegisterRedisSinkCustom()).uid("registerRedis");
        registerSite.addSink(new RegisterSiteRedisSinkCustom()).uid("registerSiteRedis");*/
        env.execute("registerUserNum");
    }
}
package com.biaodian.flink.function.register;

import cn.hutool.core.date.DateTime;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.biaodian.flink.constant.Constant;
import com.biaodian.flink.constant.PlatFormEnum;
import com.biaodian.flink.constant.PvTypeEnum;
import com.biaodian.flink.dto.RegisterDto;
import org.apache.flink.api.common.functions.MapFunction;

public class RegisterMapFunction implements MapFunction<String, RegisterDto> {
    JSONObject jsonObject = null;
    JSONObject data = null;
    RegisterDto dto = null;
    String create_at = null;

    @Override
    public RegisterDto map(String value) throws Exception {
        dto = new RegisterDto();
        jsonObject = JSON.parseObject(value.toString());
        data = jsonObject.getJSONObject("data");
        create_at = new DateTime(data.getDate("created_at").getTime()).toDateStr();
        dto.setSourcePlatform(jsonObject.getString("sourcePlatform"));
        dto.setPlatform(PlatFormEnum.nameValue(data.getString("platform")));
        dto.setPvType(PvTypeEnum.nameValue(data.getString("platform")));
        dto.setSiteId(data.getString("application_merchant_id"));
        dto.setEventType(data.getString("event_type"));
        dto.setUid(data.getString("user_id"));
        dto.setStatisticsDateNum(Integer.parseInt(create_at.replaceAll(Constant.UNDER_LINE, Constant.EMPTY_STRING)));
        dto.setNum(Constant.INTEGER_ONE);
        return dto;
    }
}
package com.biaodian.flink.function.register;

import cn.hutool.core.date.DateUtil;
import com.biaodian.flink.constant.Constant;
import com.biaodian.flink.dto.RegisterDto;
import org.apache.flink.api.common.functions.FilterFunction;

public class RegisterFilterFunction implements FilterFunction<RegisterDto> {
    Integer dateInt = null;
    @Override
    public boolean filter(RegisterDto value) throws Exception {
        dateInt = Integer.parseInt(DateUtil.date().toDateStr().replaceAll(Constant.UNDER_LINE,Constant.EMPTY_STRING));
        return dateInt.equals(value.getStatisticsDateNum());
    }
}
package com.biaodian.flink.keyby;

import com.biaodian.flink.dto.RegisterDto;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple6;

public class RegisterKeyBy implements KeySelector<RegisterDto, Tuple6<Integer, String, String, String, String, String>> {
    @Override
    public Tuple6<Integer, String, String, String, String, String> getKey(RegisterDto value) throws Exception {
        return new Tuple6<>(value.getStatisticsDateNum(), value.getSourcePlatform(), value.getPlatform(), value.getPvType(), value.getSiteId(), value.getEventType());
    }
}
package com.biaodian.flink.tool;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.state.ReducingState;
import org.apache.flink.api.common.state.ReducingStateDescriptor;
import org.apache.flink.api.common.typeutils.base.LongSerializer;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.triggers.Trigger;
import org.apache.flink.streaming.api.windowing.triggers.TriggerResult;
import org.apache.flink.streaming.api.windowing.windows.Window;

public class CountOrProcessingTimeTrigger<W extends Window> extends Trigger<Object, W> {
    private static final long serialVersionUID = 1L;

    private final long interval;
    private final long maxCount;
    /**
     * When merging we take the lowest of all fire timestamps as the new fire timestamp.
     */
    private final ReducingStateDescriptor<Long> stateDesc =
            new ReducingStateDescriptor<>("fire-time", new Min(), LongSerializer.INSTANCE);
    private final ReducingStateDescriptor<Long> countDesc =
            new ReducingStateDescriptor<>("count", new Sum(), LongSerializer.INSTANCE);

    private static class Min implements ReduceFunction<Long> {
        private static final long serialVersionUID = 1L;

        @Override
        public Long reduce(Long value1, Long value2) throws Exception {
            return Math.min(value1, value2);
        }
    }

    private static class Sum implements ReduceFunction<Long> {
        private static final long serialVersionUID = 1L;

        @Override
        public Long reduce(Long value1, Long value2) throws Exception {
            return value1 + value2;
        }
    }

    private CountOrProcessingTimeTrigger(long interval, long maxCount) {
        this.interval = interval;
        this.maxCount = maxCount;
    }

    @Override
    public TriggerResult onElement(Object element, long timestamp, W window, TriggerContext ctx)
            throws Exception {
        // 注册窗口结束的触发器, 不需要会自动触发
        // count
        ReducingState<Long> count = ctx.getPartitionedState(countDesc);
        //interval
        ReducingState<Long> fireTimestamp = ctx.getPartitionedState(stateDesc);
        // 每条数据 counter + 1
        count.add(1L);
        if (count.get() >= maxCount) {
            // 满足条数的触发条件,先清 0 条数计数器
            count.clear();
            // 满足条数时也需要清除时间的触发器,如果不是创建结束的触发器
            if (fireTimestamp.get() != window.maxTimestamp()) {
                ctx.deleteProcessingTimeTimer(fireTimestamp.get());
            }
            fireTimestamp.clear();
            // fire 触发计算
            return TriggerResult.FIRE;
        }

        // 触发之后,下一条数据进来才设置时间计数器注册下一次触发的时间
        timestamp = ctx.getCurrentProcessingTime();
        if (fireTimestamp.get() == null) {
            long nextFireTimestamp = timestamp + interval;
            ctx.registerProcessingTimeTimer(nextFireTimestamp);
            fireTimestamp.add(nextFireTimestamp);
        }
        return TriggerResult.CONTINUE;
    }

    @Override
    public TriggerResult onEventTime(long time, W window, TriggerContext ctx) throws Exception {
        return TriggerResult.CONTINUE;
    }

    @Override
    public TriggerResult onProcessingTime(long time, W window, TriggerContext ctx)
            throws Exception {
        // count
        ReducingState<Long> count = ctx.getPartitionedState(countDesc);
        //interval
        ReducingState<Long> fireTimestamp = ctx.getPartitionedState(stateDesc);

        // time trigger and window end
        if (time == window.maxTimestamp()) {
            count.clear();
            fireTimestamp.clear();
            fireTimestamp.add(time + interval);
            ctx.registerProcessingTimeTimer(time + interval);
            return TriggerResult.FIRE_AND_PURGE;
            // 窗口结束,清0条数和时间的计数器
            /*count.clear();
            ctx.deleteProcessingTimeTimer(fireTimestamp.get());
            fireTimestamp.clear();
            return TriggerResult.FIRE_AND_PURGE;*/
        } else if (fireTimestamp.get() != null && fireTimestamp.get().equals(time)) {
            // 时间计数器触发,清0条数和时间计数器
            count.clear();
            fireTimestamp.clear();
            return TriggerResult.FIRE;
        }
        return TriggerResult.CONTINUE;
    }

    @Override
    public void clear(W window, TriggerContext ctx) throws Exception {
        // State could be merged into new window.
        ReducingState<Long> fireTimestamp = ctx.getPartitionedState(stateDesc);
        Long timestamp = fireTimestamp.get();
        if (timestamp != null) {
            ctx.deleteProcessingTimeTimer(timestamp);
            fireTimestamp.clear();
        }
        ctx.getPartitionedState(countDesc).clear();
    }

    @Override
    public boolean canMerge() {
        return true;
    }

    @Override
    public void onMerge(W window, OnMergeContext ctx) throws Exception {
        // States for old windows will lose after the call.
        ctx.mergePartitionedState(stateDesc);

        // Register timer for this new window.
        Long nextFireTimestamp = ctx.getPartitionedState(stateDesc).get();
        if (nextFireTimestamp != null) {
            ctx.registerProcessingTimeTimer(nextFireTimestamp);
        }
        ctx.mergePartitionedState(countDesc);
    }

    @VisibleForTesting
    public long getInterval() {
        return interval;
    }

    @Override
    public String toString() {
        return "ContinuousProcessingTimeTrigger(" + interval + ")";
    }

    /**
     * Creates a trigger that continuously fires based on the given interval.
     *
     * @param interval The time interval at which to fire.
     * @param <W>      The type of {@link Window Windows} on which this trigger can operate.
     */
    public static <W extends Window> CountOrProcessingTimeTrigger<W> of(Time interval, Long count) {
        return new CountOrProcessingTimeTrigger(interval.toMilliseconds(), count);
    }
}
package com.biaodian.flink.function.register;

import cn.hutool.core.date.DateTime;
import cn.hutool.core.date.DateUtil;
import com.biaodian.flink.constant.Constant;
import com.biaodian.flink.dto.RegisterDto;
import com.biaodian.flink.tool.RedisUtil;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.redisson.api.RBloomFilter;

import java.util.concurrent.TimeUnit;

/**
 * 新增用户数聚合运算
 *
 * @author 赵阳光
 * @date 2021-08-16 11:10
 */
public class RegisterAggregateFunction implements AggregateFunction<RegisterDto, Integer, Integer> {
    private RBloomFilter<Object> registerUserNum = null;
    private String dateStr = null;
    private DateTime dateTime = null;

    @Override
    public Integer createAccumulator() {
        dateTime = DateUtil.date();
        dateStr = dateTime.toDateStr().replaceAll(Constant.UNDER_LINE, Constant.EMPTY_STRING);
        registerUserNum = RedisUtil.redisson.getBloomFilter(dateStr + "registerUserNum");
        registerUserNum.tryInit(Constant.BLOOM_REGISTER_SIZE, 0.000001);
        registerUserNum.expire(Constant.BLOOM_ACTIVE_SECOND, TimeUnit.SECONDS);
        return 0;
    }

    @Override
    public Integer add(RegisterDto value, Integer accumulator) {
        String key = value.getPlatform() + value.getPvType() + value.getSiteId() + value.getEventType() + value.getUid();
        if (null == registerUserNum){
            dateTime = DateUtil.date();
            dateStr = dateTime.toDateStr().replaceAll(Constant.UNDER_LINE, Constant.EMPTY_STRING);
            registerUserNum = RedisUtil.redisson.getBloomFilter(dateStr + "registerUserNum");
            registerUserNum.tryInit(Constant.BLOOM_ORDER_SIZE, 0.000001);
            registerUserNum.expire(Constant.BLOOM_ACTIVE_SECOND, TimeUnit.SECONDS);
        }
        boolean contains = registerUserNum.contains(key);
        if (contains) {
            return accumulator;
        }
        RedisUtil.redisson.getMap(value.getStatisticsDateNum() + Constant.COLON +
                value.getSourcePlatform() + Constant.COLON +
                value.getSiteId() + Constant.COLON +
                value.getPlatform() + Constant.COLON +
                value.getPvType()).addAndGet(Constant.REGISTER_USER_NUM,Constant.INTEGER_ONE);
        registerUserNum.add(key);
        return 1 + accumulator;
    }

    @Override
    public Integer getResult(Integer accumulator) {
        return accumulator;
    }

    @Override
    public Integer merge(Integer a, Integer b) {
        return a + b;
    }
}
package com.biaodian.flink.function.register;

import com.biaodian.flink.dto.RegisterDto;
import org.apache.flink.api.java.tuple.Tuple6;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;

/**
 * 收集数据(在聚合函数中处理了 因此不需要收集了)
 */
public class RegisterWindowFunction implements WindowFunction<Integer, RegisterDto, Tuple6<Integer, String, String, String, String, String>, TimeWindow> {

    @Override
    public void apply(Tuple6<Integer, String, String, String, String, String> tuple6, TimeWindow window, Iterable<Integer> input, Collector<RegisterDto> out) throws Exception {

    }
}
package com.biaodian.flink.function.register;

import cn.hutool.core.date.DateTime;
import cn.hutool.core.date.DateUtil;
import com.biaodian.flink.constant.Constant;
import com.biaodian.flink.dto.RegisterDto;
import com.biaodian.flink.tool.RedisUtil;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.redisson.api.RBloomFilter;

import java.util.concurrent.TimeUnit;

/**
 * 新增用户数聚合运算
 *
 * @author 赵阳光
 * @date 2021-08-16 11:10
 */
public class RegisterSiteAggregateFunction implements AggregateFunction<RegisterDto, Integer, Integer> {
    private RBloomFilter<Object> registerSiteUserNum = null;
    private String dateStr = null;
    private DateTime dateTime = null;

    @Override
    public Integer createAccumulator() {
        dateTime = DateUtil.date();
        dateStr = dateTime.toDateStr().replaceAll(Constant.UNDER_LINE, Constant.EMPTY_STRING);
        registerSiteUserNum = RedisUtil.redisson.getBloomFilter(dateStr + "registerSiteUserNum");
        registerSiteUserNum.tryInit(Constant.BLOOM_REGISTER_SIZE, 0.000001);
        registerSiteUserNum.expire(Constant.BLOOM_ACTIVE_SECOND, TimeUnit.SECONDS);
        return 0;
    }

    @Override
    public Integer add(RegisterDto value, Integer accumulator) {
        String key = value.getSiteId() + value.getEventType() + value.getUid();
        if (null == registerSiteUserNum){
            dateTime = DateUtil.date();
            dateStr = dateTime.toDateStr().replaceAll(Constant.UNDER_LINE, Constant.EMPTY_STRING);
            registerSiteUserNum = RedisUtil.redisson.getBloomFilter(dateStr + "registerSiteUserNum");
            registerSiteUserNum.tryInit(Constant.BLOOM_ORDER_SIZE, 0.000001);
            registerSiteUserNum.expire(Constant.BLOOM_ACTIVE_SECOND, TimeUnit.SECONDS);
        }
        boolean contains = registerSiteUserNum.contains(key);
        if (contains) {
            return accumulator;
        }
        RedisUtil.redisson.getMap(value.getStatisticsDateNum() + Constant.COLON +
                value.getSourcePlatform() + Constant.COLON +
                value.getSiteId())
                .addAndGet(Constant.REGISTER_USER_NUM,Constant.INTEGER_ONE);
        registerSiteUserNum.add(key);
        return 1 + accumulator;
    }

    @Override
    public Integer getResult(Integer accumulator) {
        return accumulator;
    }

    @Override
    public Integer merge(Integer a, Integer b) {
        return a + b;
    }
}
package com.biaodian.flink.function.register;

import com.biaodian.flink.dto.RegisterDto;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;

/**
 * 收集数据(在聚合函数中处理了 因此不需要收集了)
 */
public class RegisterSiteWindowFunction implements WindowFunction<Integer, RegisterDto, Tuple4<Integer, String, String, String>, TimeWindow> {
    @Override
    public void apply(Tuple4<Integer, String, String, String> tuple4, TimeWindow window, Iterable<Integer> input, Collector<RegisterDto> out) throws Exception {

    }
}
package com.biaodian.flink.tool;

import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

public class PropertiesTool {
    private static final Properties properties = new Properties();
    static {
        InputStream inputStream = PropertiesTool.class.getClassLoader().getResourceAsStream("application.properties");
        try {
            properties.load(inputStream);
            if ("dev".equals(properties.getProperty("spring.profiles.active"))){
                inputStream = PropertiesTool.class.getClassLoader().getResourceAsStream("application-dev.properties");
            }else {
                inputStream = PropertiesTool.class.getClassLoader().getResourceAsStream("application-prod.properties");
            }
            properties.load(inputStream);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    private PropertiesTool(){}

    /**
     * 根据key获取value值
     * @param key
     * @return
     */
    public static String getProperty(String key){
        return properties.getProperty(key);
    }
}

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值