Flink项目之统计网站热门商品

Flink项目之统计网站热门商品

项目介绍

每五分钟统计电商网站一小时内的热门商品

数据格式
543462,1715,1464116,pv,1511658000
543462,1715,1464116,pv,1511658000
662867,2244074,1575622,pv,1511658000
分别对应
userId,itemId,categoryId,behavior,timestamp
涉及内容

窗口、状态

相关支持

Flink 1.12.5 java 8

项目所需依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>com.louxun.UserBehaviorAnalysis</artifactId>
    <packaging>pom</packaging>
    <version>1.0-SNAPSHOT</version>
    <modules>
        <module>HotItemsAnalysis</module>
        <module>NetworkFlowAnalysis</module>
    </modules>

    <properties>
        <flink.version>1.12.5</flink.version>
        <scala.binary.version>2.12</scala.binary.version>
    </properties>


        <dependencies>
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-java</artifactId>
                <version>${flink.version}</version>
            </dependency>

            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
                <version>${flink.version}</version>
            </dependency>

            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-clients_${scala.binary.version}</artifactId>
                <version>${flink.version}</version>
            </dependency>


            <dependency>
                <groupId>org.projectlombok</groupId>
                <artifactId>lombok</artifactId>
                <version>1.18.20</version>
            </dependency>


        </dependencies>



</project>	
目录结构

在这里插入图片描述

编码
  • UserBehaviorEntity
package com.louxun.entity;

import lombok.Data;

@Data
public class UserBehaviorEntity {
    private Long userId;
    private Long itemId;
    private Long categoryId;
    private String behavior;
    private Long timestamp;

    public UserBehaviorEntity(Long userId, Long itemId, Long categoryId, String behavior, Long timestamp) {
        this.userId = userId;
        this.itemId = itemId;
        this.categoryId = categoryId;
        this.behavior = behavior;
        this.timestamp = timestamp;
    }
}

  • UserBehaviorEntity
package com.louxun.entity;

import lombok.Data;

import java.io.Serializable;

@Data
public class ItemViewCountEntiy implements Serializable {
    private Long itemId;
    private Long windowEnd;
    private Long count;
}

  • HotItemsTask
package com.louxun.task;

import com.louxun.customFunction.CountAgg;
import com.louxun.customFunction.TopNHotItems;
import com.louxun.customFunction.WindowResultFunction;
import com.louxun.entity.UserBehaviorEntity;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;

import java.time.Duration;

/**
 * 一小时内热门商品统计 ,每五分钟计算一次
 */
public class HotItemsTask {
    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        String path = "D:\\flink_demo\\UserBehaviorAnalysis\\HotItemsAnalysis\\src\\main\\resources\\UserBehavior.csv";

        // 获取数据转换格式 分配时间戳
        SingleOutputStreamOperator<UserBehaviorEntity> map = env.readTextFile(path)
                .map(line -> {
                    String[] words = line.split(",");
                    return new UserBehaviorEntity(new Long(words[0]), new Long(words[1]), new Long(words[2]), words[3], new Long(words[4]));
                }).assignTimestampsAndWatermarks(
                        WatermarkStrategy
                                .<UserBehaviorEntity>forBoundedOutOfOrderness(Duration.ofSeconds(2)) // 设置水位线
                                .withTimestampAssigner((event, timestamp) -> event.getTimestamp() * 1000L)
                );
//  1.10版本实现
//        SingleOutputStreamOperator<UserBehaviorEntity> map = env.readTextFile(path)
//                .map(line -> {
//                    String[] words = line.split(",");
//                    return new UserBehaviorEntity(new Long(words[0]), new Long(words[1]), new Long(words[2]), words[3], new Long(words[4]));
//                }).assignTimestampsAndWatermarks(
//                        new AscendingTimestampExtractor<UserBehaviorEntity>() {
//                            @Override
//                            public long extractAscendingTimestamp(UserBehaviorEntity element) {
//                                return element.getTimestamp() * 1000L;
//                            }
//                        }
//                );


        // 过滤掉无用数据后进行分组
        map.filter(entity -> "pv".equals(entity.getBehavior()))
                .keyBy(entity -> entity.getItemId())
//                .timeWindow(Time.hours(1), Time.minutes(5))

                .window(SlidingEventTimeWindows.of(Time.hours(1), Time.minutes(5)))
                .aggregate(new CountAgg(), new WindowResultFunction())
                .keyBy( item -> item.getWindowEnd())
                .process(new TopNHotItems())
                .print();


        env.execute("hot items list");

    }


}

  • CountAgg
package com.louxun.customFunction;

import com.louxun.entity.UserBehaviorEntity;
import org.apache.flink.api.common.functions.AggregateFunction;

public class CountAgg implements AggregateFunction<UserBehaviorEntity, Long, Long> {
    @Override
    public Long createAccumulator() {
        return 0l;
    }

    @Override
    public Long add(UserBehaviorEntity value, Long accumulator) {
        return accumulator + 1;
    }

    @Override
    public Long getResult(Long accumulator) {
        return accumulator;
    }

    @Override
    public Long merge(Long a, Long b) {
        return a + b;
    }
}

  • TopNHotItems
package com.louxun.customFunction;

import com.louxun.entity.ItemViewCountEntiy;
import org.apache.commons.compress.utils.Lists;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;

import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;

public class TopNHotItems extends KeyedProcessFunction<Long, ItemViewCountEntiy, String> {

    // 定义初始变量
    private ListState<ItemViewCountEntiy> itemState;

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        itemState = getRuntimeContext().getListState(new ListStateDescriptor<ItemViewCountEntiy>("itemState-state", ItemViewCountEntiy.class));
    }

    @Override
    public void processElement(ItemViewCountEntiy value, Context ctx, Collector<String> out) throws Exception {
        itemState.add(value);
        ctx.timerService().registerEventTimeTimer(value.getWindowEnd() + 1);
    }

    @Override
    public void onTimer(long timestamp, OnTimerContext ctx, Collector<String> out) throws Exception {
        super.onTimer(timestamp, ctx, out);

        ArrayList<ItemViewCountEntiy> list = Lists.newArrayList(itemState.get().iterator());
//        List<ItemViewCountEntiy> list = new ArrayList<>();
//        for (ItemViewCountEntiy entiy : itemState.get()) {
//            list.add(entiy);
//        }

        itemState.clear();


        list.sort(new Comparator<ItemViewCountEntiy>() {
            @Override
            public int compare(ItemViewCountEntiy o1, ItemViewCountEntiy o2) {
                long num = o2.getCount() - o1.getCount();
//               return o2.getCount().intValue()-o1.getCount().intValue();
                if (num > 0) {
                    return 1;
                } else if (num < 0) {
                    return -1;
                }
                return 0;
            }
        });


        StringBuilder result = new StringBuilder();




        for (int i = 0; i < 5; i++) {
            ItemViewCountEntiy entiy = list.get(i);
            result.append("No").append(i + 1).append(":")
                    .append("  商品ID=").append(entiy.getItemId())
                    .append("  浏览量=").append(entiy.getCount()).append("\n");
        }
//        for (ItemViewCountEntiy entiy : resultList) {
//            result.append("No").append(i+1).append(":")
//                    .append("  商品ID=").append(entiy.getItemId())
//                    .append("  浏览量=").append(entiy.getCount()).append("\n");
//
//        }

        result.append("====================================\n\n");
        // 控制输出频率,模拟实时滚动结果
        Thread.sleep(1000);
        out.collect(result.toString());

    }
}

  • WindowResultFunction
package com.louxun.customFunction;

import com.louxun.entity.ItemViewCountEntiy;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;

public class WindowResultFunction implements WindowFunction<Long, ItemViewCountEntiy, Long, TimeWindow> {


    @Override
    public void apply(Long aLong, TimeWindow window, Iterable<Long> input, Collector<ItemViewCountEntiy> out) throws Exception {
        ItemViewCountEntiy entiry = new ItemViewCountEntiy();
        entiry.setItemId(aLong);
        entiry.setCount(input.iterator().next());
        entiry.setWindowEnd(window.getEnd());
        out.collect(entiry);
    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值