Flink综合案例实战

需求:公司开发⼀个监控告警平台(类似nginx访问⽇志)
      每 5 秒统计过去 1 分钟可以统计各个接⼝的访问量
      每 5 秒统计过去 1 分钟各个接⼝的各个状态码次数
⽇志来源
     访问⽇志⽇志来源存在乱序、⽆效访问记录
     要实时显示数据,可以间隔时间⼆次更新
     最⼤允许 1 分钟延迟,超过后兜底保存
定义POJO
  
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

import java.util.Date;

/**
 *
 * @Description  输入类
 * @Version 1.0
 **/

@Data
@NoArgsConstructor
@AllArgsConstructor
public class AccessLogDO {

    private String title;

    private String url;

    private String method;

    private Integer httpCode;

    private String body;

    private Date createTime;

    private String userId;

    private String city;
}
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

/**
 *
 * @Description 输出类
 * @Version 1.0
 **/
@AllArgsConstructor
@NoArgsConstructor
@Data
public class ResultCount {

    private String url;

    private Integer httpCode;

    private Long count;

    private String startTime;

    private String endTime;

    private String type;
}

模拟数据源

import net.xdclass.util.TimeUtils;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Random;

/**
 *
 * @Description 模拟数据源
 * @Version 1.0
 **/
public class AccessLogSource extends RichParallelSourceFunction<AccessLogDO> {

    private volatile Boolean flag = true;

    private Random random = new Random();


    //接口
    private static List<AccessLogDO> urlList = new ArrayList<>();
    static {
        urlList.add(new AccessLogDO("首页","/pub/api/v1/web/index_card","GET",200,"",new Date(),"",""));
        urlList.add(new AccessLogDO("个人信息","/pub/api/v1/web/user_info","GET",200,"",new Date(),"",""));
//        urlList.add(new AccessLogDO("分类列表","/pub/api/v1/web/all_category","GET",200,"",new Date(),"",""));
//        urlList.add(new AccessLogDO("分页视频","/pub/api/v1/web/page_video","GET",200,"",new Date(),"",""));
//        urlList.add(new AccessLogDO("收藏","/user/api/v1/favorite/save","POST",200,"",new Date(),"",""));
//        urlList.add(new AccessLogDO("下单","/user/api/v1/product/order/save","POST",200,"",new Date(),"",""));
//        urlList.add(new AccessLogDO("异常url","","POST",200,"",new Date(),"",""));
    }


    //状态码
    private static List<Integer> codeList = new ArrayList<>();
    static {
        codeList.add(200);
        codeList.add(200);
        codeList.add(200);
        codeList.add(502);
        codeList.add(403);
    }



    @Override
    public void run(SourceContext<AccessLogDO> ctx) throws Exception {

        while (flag){

            Thread.sleep(1000);
            int userId = random.nextInt(50);
            int httpCodeNum = random.nextInt(codeList.size());
            int accessLogNum = random.nextInt(urlList.size());
            AccessLogDO accessLogDO = urlList.get(accessLogNum);
            accessLogDO.setHttpCode(codeList.get(httpCodeNum));
            accessLogDO.setUserId(userId+"");

            long timestamp = System.currentTimeMillis() - random.nextInt(5000);

            accessLogDO.setCreateTime(new Date(timestamp));
            System.out.println("产生:"+accessLogDO.getTitle()+",状态码:"+accessLogDO.getHttpCode()+", 时间:"+ TimeUtils.format(accessLogDO.getCreateTime()));
            ctx.collect(accessLogDO);
        }
    }

    @Override
    public void cancel() {
        flag = false;
    }
}

// job main 类

import net.xdclass.util.TimeUtils;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.WindowedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;

import java.time.Duration;

public class MonitorApp {

    public static void main(String[] args) {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        DataStreamSource<AccessLogDO> ds = env.addSource(new AccessLogSource());

        //过滤
        SingleOutputStreamOperator<AccessLogDO> filterDS = ds.filter(new FilterFunction<AccessLogDO>() {
            @Override
            public boolean filter(AccessLogDO accessLogDO) throws Exception {
                return accessLogDO.getHttpCode() != 200;
            }
        });

        //指定watermark
        final SingleOutputStreamOperator<AccessLogDO> watermarksDS = filterDS.assignTimestampsAndWatermarks(
                //允许乱序的时间是3s
                WatermarkStrategy.<AccessLogDO>forBoundedOutOfOrderness(Duration.ofSeconds(3))
                        //开窗的间隔 为日志的 create time    accessLogDO.getCreateTime
                        .withTimestampAssigner((event, timestamp) -> event.getCreateTime().getTime()));

        //分组
        final KeyedStream<AccessLogDO, String> keyByDS = watermarksDS.keyBy(new KeySelector<AccessLogDO, String>() {
            @Override
            public String getKey(AccessLogDO accessLogDO) throws Exception {
                //accessLogDO.getUrl
                return accessLogDO.getUrl();
            }
        });

        //兜底数据
        OutputTag<AccessLogDO> outdata = new OutputTag<AccessLogDO>("lateDataLog") {
        };

        //开窗
        WindowedStream<AccessLogDO, String, TimeWindow> windowedStream =
                // 每5秒统计过去1分钟可以统计各个接⼝的访问量
                keyByDS.window(SlidingEventTimeWindows.of(Time.seconds(60), Time.seconds(5)))
                //可以容忍60s的乱序数据
                .allowedLateness(Time.seconds(60))
                 //最后的兜底数据 需要增加逻辑处理
                .sideOutputLateData(outdata);


        //聚合
        final SingleOutputStreamOperator<ResultCount> aggregate = windowedStream.aggregate(new AggregateFunction<AccessLogDO, Long, Long>() {
            @Override
            public Long createAccumulator() {
                return 0L;
            }

            @Override
            public Long add(AccessLogDO accessLogDO, Long aLong) {
                return aLong + 1;
            }

            @Override
            public Long getResult(Long aLong) {
                return aLong;
            }

            @Override
            public Long merge(Long a, Long b) {
                return a + b;
            }
        }, new ProcessWindowFunction<Long, ResultCount, String, TimeWindow>() {
            @Override
            public void process(String value, Context context, Iterable<Long> elements, Collector<ResultCount> out) throws Exception {
                ResultCount resultCount = new ResultCount();
                resultCount.setUrl(value);
                resultCount.setStartTime(TimeUtils.format(context.window().getStart()));
                resultCount.setEndTime(TimeUtils.format(context.window().getEnd()));
                long total = elements.iterator().next();
                resultCount.setCount(total);
                out.collect(resultCount);
            }
        });

        aggregate.print("实时1分钟接口访问量");

        try {
            env.execute("Monitor App");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

执行main方法

产生:个人信息,状态码:403, 时间:2023-01-14 22:47:29
产生:首页,状态码:200, 时间:2023-01-14 22:47:30
产生:首页,状态码:502, 时间:2023-01-14 22:47:32
产生:首页,状态码:502, 时间:2023-01-14 22:47:34
实时1分钟接口访问量> ResultCount(url=/pub/api/v1/web/user_info, httpCode=null, count=1, startTime=2023-01-14 22:46:30, endTime=2023-01-14 22:47:30, type=null)
产生:个人信息,状态码:200, 时间:2023-01-14 22:47:35
产生:个人信息,状态码:200, 时间:2023-01-14 22:47:37
产生:个人信息,状态码:403, 时间:2023-01-14 22:47:34
产生:首页,状态码:502, 时间:2023-01-14 22:47:35
产生:首页,状态码:200, 时间:2023-01-14 22:47:37
产生:个人信息,状态码:200, 时间:2023-01-14 22:47:37
产生:个人信息,状态码:403, 时间:2023-01-14 22:47:42
实时1分钟接口访问量> ResultCount(url=/pub/api/v1/web/user_info, httpCode=null, count=2, startTime=2023-01-14 22:46:35, endTime=2023-01-14 22:47:35, type=null)
实时1分钟接口访问量> ResultCount(url=/pub/api/v1/web/index_card, httpCode=null, count=2, startTime=2023-01-14 22:46:35, endTime=2023-01-14 22:47:35, type=null)
产生:首页,状态码:502, 时间:2023-01-14 22:47:41
产生:个人信息,状态码:200, 时间:2023-01-14 22:47:39
产生:首页,状态码:200, 时间:2023-01-14 22:47:42
产生:首页,状态码:200, 时间:2023-01-14 22:47:43
产生:首页,状态码:403, 时间:2023-01-14 22:47:43
实时1分钟接口访问量> ResultCount(url=/pub/api/v1/web/user_info, httpCode=null, count=2, startTime=2023-01-14 22:46:40, endTime=2023-01-14 22:47:40, type=null)
实时1分钟接口访问量> ResultCount(url=/pub/api/v1/web/index_card, httpCode=null, count=3, startTime=2023-01-14 22:46:40, endTime=2023-01-14 22:47:40, type=null)

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值