基于Flink的个人装扮商城群体用户画像与数据实时统计系统(五)-需求集B实现

最新推荐文章于 2021-04-08 14:35:24 发布

珞清殇

最新推荐文章于 2021-04-08 14:35:24 发布

阅读量370

点赞数

分类专栏：基于Flink的个人装扮商城群体用户画像与数据实时统计系统文章标签：大数据 flink

本文链接：https://blog.csdn.net/qq_44992559/article/details/115435243

版权

基于Flink的个人装扮商城群体用户画像与数据实时统计系统专栏收录该内容

9 篇文章 6 订阅

订阅专栏

一、需求集B有什么？

所有需求link：基于Flink的个人装扮商城群体用户画像与数据实时统计系统(二)-项目介绍与需求介绍

需求集B是针对模拟生成的用户浏览商品的信息提出的，包括：

群体用户画像之当日实时品牌偏好
- 实时包品牌偏好
- 实时服装品牌偏好
- 实时鞋品牌偏好
各类产品(包类、服装类、鞋类)近一分钟浏览次数统计，每10s统计一次(超喜欢的需求)
群体用户画像之当日实时终端偏好

附：模拟生成的用户浏览商品的信息字段
在这里插入图片描述

二、模拟生成用户浏览商品的信息

用户浏览商品的信息实体类编写：ScanProductInfo

package cn.edu.neu.bean;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

/**
 * @author 32098
 *
 * 用户商品浏览
 */
@AllArgsConstructor
@NoArgsConstructor
@Data
public class ScanProductInfo {
    /**
     * 用户ID
     */
    private String userId;
    /**
     * 商品ID
     */
    private String productId;
    /**
     * 商品类别
     */
    private String productType;
    /**
     * 浏览的商品品牌
     */
    private String brand;
    /**
     * 商品浏览的开始时间
     */
    private long beginScanTime;
    /**
     * 停留在商品的时间：ms
     */
    private long stayTime;
    /**
     * 浏览使用的终端类型
     */
    private String useType;
}

用户浏览商品的信息模拟生成：ScanProductInfoSource

package cn.edu.neu.source;

import cn.edu.neu.bean.ScanProductInfo;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;

import java.util.Arrays;
import java.util.List;
import java.util.Random;

/**
 * @author 32098
 *
 * 模拟商品浏览数据
 */
public class ScanProductInfoSource extends RichParallelSourceFunction<ScanProductInfo> {
    private boolean keepMockData;

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        keepMockData = true;
    }

    @Override
    public void run(SourceContext<ScanProductInfo> sourceContext) throws Exception {
        // 用户ID
        String userId;
        // 商品ID
        String productId;
        // 商品类型
        List<String> productTypeList = Arrays.asList("Clothes", "Shoes", "Bags");
        String productType = "";
        // 浏览商品开始的时间
        long scanBeginTime;
        // 停留时间
        long stayTime;
        // 用户使用的终端
        List<String> useTypeList = Arrays.asList("PC端", "小程序端", "移动端");
        String useType;
        // 用户浏览的商品品牌
        List<List<String>> brandsOfProductType = Arrays.asList(
                Arrays.asList("UNIQLO优衣库", "PEACEBIRD太平鸟", "每依站", "LEDIN乐町", "VEROMODA"),
                Arrays.asList("LINING李宁", "鸿星尔克ERKE", "ANTA安踏", "Nike耐克", "XTEP特步", "NewBalance", "鸿星尔克ERKE", "361°"),
                Arrays.asList("LV路易威登", "Prada普拉达", "Hermes爱马仕", "Gucci古驰", "Armani阿玛尼", "BALLY巴利", "Fendi芬迪", "纪梵希")
        );
        String brand;

        Random r = new Random();

        userId = RandomStringUtils.randomNumeric(3);
        productId = RandomStringUtils.randomAlphabetic(6);
        int productTypeIdx = r.nextInt(productTypeList.size());
        productType = productTypeList.get(productTypeIdx);
        List<String> brandsOfCurProType = brandsOfProductType.get(productTypeIdx);
        brand = brandsOfCurProType.get(r.nextInt(brandsOfCurProType.size()));
        stayTime = r.nextInt(5000)+500;
        scanBeginTime = System.currentTimeMillis();
        double prob = r.nextDouble();
        if(prob>0.45){
            useType = useTypeList.get(2);
        }else if(prob>0.18){
            useType = useTypeList.get(1);
        }else{
            useType = useTypeList.get(0);
        }
        sourceContext.collect(new ScanProductInfo(userId, productId, productType, brand, scanBeginTime, stayTime, useType));

        while (keepMockData) {
            for(int i=0; i<r.nextInt(20); i++){
                userId = RandomStringUtils.randomNumeric(3);
                productId = RandomStringUtils.randomAlphabetic(6);

                if((!"".equals(productType)) && r.nextDouble()<0.2){
                    productTypeIdx = r.nextInt(productTypeList.size());
                    productType = productTypeList.get(productTypeIdx);
                    brandsOfCurProType = brandsOfProductType.get(productTypeIdx);
                    brand = brandsOfCurProType.get(r.nextInt(brandsOfCurProType.size()));
                }

                stayTime = r.nextInt(5000)+500;
                scanBeginTime = System.currentTimeMillis() - stayTime;

                prob = r.nextDouble();
                if(prob>0.45){
                    useType = useTypeList.get(2);
                }else if(prob>0.18){
                    useType = useTypeList.get(1);
                }else{
                    useType = useTypeList.get(0);
                }

                sourceContext.collect(new ScanProductInfo(userId, productId, productType, brand, scanBeginTime, stayTime, useType));
            }
            Thread.sleep((r.nextInt(20)+1)*100);
        }
    }

    @Override
    public void cancel() {
        keepMockData = false;
    }
}

编写 FlinkKafkaProducer，模拟用户浏览商品的信息的采集

package cn.edu.neu.kafka;

import cn.edu.neu.bean.ScanProductInfo;
import cn.edu.neu.source.ScanProductInfoSource;
import com.alibaba.fastjson.JSON;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;

import java.util.Properties;

/**
 * @author 32098
 */
public class ScanProductInfoKafkaProducer {
    public static void main(String[] args) throws Exception {
        // 1. env：创建流式执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 2. source：添加自定义产生广告点击模拟数据的Source
        DataStreamSource<ScanProductInfo> advertiseClickDataStream = env.addSource(new ScanProductInfoSource());

        // 3. transformation
        SingleOutputStreamOperator<String> advertiseClickDataJsonStream = advertiseClickDataStream.map(new MapFunction<ScanProductInfo, String>() {
            @Override
            public String map(ScanProductInfo advertiseClickBean) throws Exception {
                return JSON.toJSONString(advertiseClickBean);
            }
        });

        // 4. sink to kafka
        Properties props = new Properties();
        props.setProperty("bootstrap.servers", "master:9092");
        FlinkKafkaProducer<String> kafkaSink = new FlinkKafkaProducer<>("flink_kafka", new SimpleStringSchema(), props);

        advertiseClickDataJsonStream.addSink(kafkaSink);

        // 5. execute
        env.execute();
    }
}

三、需求集B实现

群体用户画像之当日实时品牌偏好

package cn.edu.neu.task.windowTask;

import cn.edu.neu.bean.ScanProductInfo;
import cn.edu.neu.bean.Statics;
import cn.edu.neu.sink.StaticsSink;
import com.alibaba.fastjson.JSON;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.SlidingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.triggers.ContinuousProcessingTimeTrigger;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;

import java.util.ArrayList;
import java.util.Properties;

/**
 *
 * @author 32098
 *
 * 群体用户画像之实时品牌偏好
 */
public class BrandLikeTask {
    public static void main(String[] args) {
        Properties pros = new Properties();
        pros.setProperty("bootstrap.servers", "master:9092");
        pros.setProperty("group.id", "flink");
        pros.setProperty("auto.offset.reset","latest");
        pros.setProperty("flink.partition-discovery.interval-millis","5000");
        pros.setProperty("enable.auto.commit", "true");
        pros.setProperty("auto.commit.interval.ms", "2000");

        FlinkKafkaConsumer<String> kafkaSource = new FlinkKafkaConsumer<String>(
                "flink_kafka",
                new SimpleStringSchema(),
                pros
        );
        kafkaSource.setStartFromLatest();

        // 1. env
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 2. source
        DataStreamSource<String> kafkaDataStream = env.addSource(kafkaSource);

        // 3. transformation
        // to java object
        SingleOutputStreamOperator<ScanProductInfo> scanProductInfoDataStream = kafkaDataStream.map(new MapFunction<String, ScanProductInfo>() {
            @Override
            public ScanProductInfo map(String s) throws Exception {
                return JSON.parseObject(s, ScanProductInfo.class);
            }
        });

        // Exception in thread "main" org.apache.flink.api.common.functions.InvalidTypesException: The generic type parameters of 'Tuple2' are missing.
        // In many cases lambda methods don't provide enough information for automatic type extraction when Java generics are involved. An easy workaround is to use an (anonymous) class instead that implements the 'org.apache.flink.api.java.functions.KeySelector' interface.
        // Otherwise the type has to be specified explicitly using type information.
//        SingleOutputStreamOperator<Statics> resultDs = scanProductInfoDataStream.map(new MapFunction<ScanProductInfo, Tuple2<Tuple2<String, String>, Long>>() {
//            @Override
//            public Tuple2<Tuple2<String, String>, Long> map(ScanProductInfo scanProductInfo) throws Exception {
//                String productType = scanProductInfo.getProductType();
//                String brand = scanProductInfo.getBrand();
//                return Tuple2.of(Tuple2.of("brandLikeOf" + productType, brand), 1L);
//            }
//        }).keyBy(e -> e.f0).window(SlidingProcessingTimeWindows.of(Time.minutes(6), Time.minutes(1))).reduce(new ReduceFunction<Tuple2<Tuple2<String, String>, Long>>() {
//            @Override
//            public Tuple2<Tuple2<String, String>, Long> reduce(Tuple2<Tuple2<String, String>, Long> tuple1, Tuple2<Tuple2<String, String>, Long> tuple2) throws Exception {
//                return Tuple2.of(tuple1.f0, tuple1.f1 + tuple2.f1);
//            }
//        }).map(new MapFunction<Tuple2<Tuple2<String, String>, Long>, Statics>() {
//            @Override
//            public Statics map(Tuple2<Tuple2<String, String>, Long> tupleIn) throws Exception {
//                return new Statics(tupleIn.f0.f0, tupleIn.f0.f1, tupleIn.f1);
//            }
//        });

        SingleOutputStreamOperator<Statics> resultDs = scanProductInfoDataStream.map(new MapFunction<ScanProductInfo, Tuple2<String, Long>>() {
            @Override
            public Tuple2<String, Long> map(ScanProductInfo scanProductInfo) throws Exception {
                String productType = scanProductInfo.getProductType();
                String brand = scanProductInfo.getBrand();
                return Tuple2.of("brandLikeOf" + productType+"#"+brand, 1L);
            }
        }).keyBy(e -> e.f0).window(TumblingProcessingTimeWindows.of(Time.days(1))).trigger(ContinuousProcessingTimeTrigger.of(Time.seconds(10))).reduce(new ReduceFunction<Tuple2<String, Long>>() {
            @Override
            public Tuple2<String, Long> reduce(Tuple2<String, Long> t1, Tuple2<String, Long> t2) throws Exception {
                return Tuple2.of(t1.f0, t1.f1+t2.f1);
            }
        }).map(new MapFunction<Tuple2<String, Long>, Statics>() {
            @Override
            public Statics map(Tuple2<String, Long> tuple) throws Exception {
                String[] staticNameAndDetail = tuple.f0.split("#");
                return new Statics(staticNameAndDetail[0], staticNameAndDetail[1], tuple.f1);
            }
        });

        resultDs.addSink(new StaticsSink());

        try {
            env.execute("useType analysis");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

该需求实现设置了1天的滚动窗口且设置了每10s触发一次计算的触发器。

各类产品(包类、服装类、鞋类)近一分钟浏览次数统计，每10s统计一次(超喜欢的需求)

package cn.edu.neu.bean;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

/**
 * @author 32098
 */
@NoArgsConstructor
@AllArgsConstructor
@Data
public class ProductTypeScanCountStatic {
    private String productType;
    private String dealtTime;
    private Long scanCount;
}

package cn.edu.neu.task.windowTask;

import cn.edu.neu.bean.ProductTypeScanCountStatic;
import cn.edu.neu.bean.ScanProductInfo;
import cn.edu.neu.sink.ProTypeScanStaticSink;
import com.alibaba.fastjson.JSON;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

import java.text.SimpleDateFormat;
import java.time.Duration;
import java.util.Date;
import java.util.Properties;

import static org.apache.flink.table.api.Expressions.$;

/**
 * @author 32098
 *
 * 各类产品近一分钟浏览次数统计，每10s统计一次；
 */
public class ScanProductNearlyMinuteCountTask {
    @AllArgsConstructor
    @NoArgsConstructor
    @Data
    public static class TimeProductTypeCount{
        private Long eventTime;
        private String dealtTime;
        private String productType;
        private long scanCount;
    }

    /**
     *
     * 时间处理逻辑：时间戳->HH:mm:ss->HH:mm:ss'，ss->ss' 的处理逻辑如下：
     *
     *         9s(1-10) => 10s
     *         13s(11-20) => 20s
     *         24s(21-30) => 30s
     *         32s(31-40) => 40s
     *         48s(41-50) => 50s
     *         56s(51-60) => 60s(0)
     *         (s / 10 (整除) + 1)*10 : (56/10+1)=60
     *
     * @return 处理好的时间，例子=>12:12:12->12:12:20
     */
    private static String timeProcess(long ts){
        String time = new SimpleDateFormat("HH:mm:ss").format(new Date(ts));
        String[] hms = time.split(":");
        int s = (Integer.parseInt(hms[2])/10+1)*10;
        int m = Integer.parseInt(hms[1]);
        int h = Integer.parseInt(hms[0]);
        if(s == 60){
            m = m + 1;
            s = 0;
            if(m == 60){
                h = h + 1;
                if(h == 24){
                    h = 0;
                }
            }
        }
        String hStr, mStr, sStr;
        if(h < 10){
            hStr = "0" + h;
        }else{
            hStr = String.valueOf(h);
        }
        if(m < 10){
            mStr = "0" + m;
        }else{
            mStr = String.valueOf(m);
        }
        if(s == 0){
            sStr = "00";
        }else{
            sStr = String.valueOf(s);
        }
        return hStr+":"+mStr+":"+sStr;
    }

    public static void main(String[] args) {
        Properties pros = new Properties();
        pros.setProperty("bootstrap.servers", "master:9092");
        pros.setProperty("group.id", "flink");
        pros.setProperty("auto.offset.reset","latest");
        pros.setProperty("flink.partition-discovery.interval-millis","5000");
        pros.setProperty("enable.auto.commit", "true");
        pros.setProperty("auto.commit.interval.ms", "2000");

        FlinkKafkaConsumer<String> kafkaSource = new FlinkKafkaConsumer<String>(
                "flink_kafka",
                new SimpleStringSchema(),
                pros
        );
        kafkaSource.setStartFromLatest();

        // 1. env
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

        // 2. source
        DataStreamSource<String> kafkaDataStream = env.addSource(kafkaSource);

        // 3. transformation
        // to java object
        SingleOutputStreamOperator<ScanProductInfo> scanProductInfoDataStream = kafkaDataStream.map(new MapFunction<String, ScanProductInfo>() {
            @Override
            public ScanProductInfo map(String s) throws Exception {
                return JSON.parseObject(s, ScanProductInfo.class);
            }
        });

        //
        SingleOutputStreamOperator<ScanProductInfo> wateredProductScanDs = scanProductInfoDataStream.assignTimestampsAndWatermarks(
                WatermarkStrategy.<ScanProductInfo>forBoundedOutOfOrderness(Duration.ofSeconds(3)).withTimestampAssigner((scanProductInfo, timeStamp) -> scanProductInfo.getBeginScanTime())
        );

        //
        SingleOutputStreamOperator<TimeProductTypeCount> dealtProductScanDs = wateredProductScanDs.map(new MapFunction<ScanProductInfo, Tuple4<Long, String, String, Long>>() {
            @Override
            public Tuple4<Long, String, String, Long> map(ScanProductInfo scanProductInfo) throws Exception {
                Long eventTime = scanProductInfo.getBeginScanTime();
                String timeDealt = timeProcess(scanProductInfo.getBeginScanTime());
                String productType = scanProductInfo.getProductType();
                return Tuple4.of(eventTime, timeDealt, productType, 1L);
            }
        }).map(new MapFunction<Tuple4<Long, String, String, Long>, TimeProductTypeCount>() {
            @Override
            public TimeProductTypeCount map(Tuple4<Long, String, String, Long> inTuple) throws Exception {
                return new TimeProductTypeCount(inTuple.f0, inTuple.f1, inTuple.f2, inTuple.f3);
            }
        });

        // Cannot apply '$HOP' to arguments of type '$HOP(<BIGINT>, <INTERVAL SECOND>, <INTERVAL SECOND>)'. Supported form(s): '$HOP(<DATETIME>, <DATETIME_INTERVAL>, <DATETIME_INTERVAL>)'
        tEnv.createTemporaryView("temp", dealtProductScanDs, $("eventTime").rowtime(), $("dealtTime"), $("productType"), $("scanCount"));

        Table queryResultTable = tEnv.sqlQuery(
                "SELECT productType, dealtTime, count(scanCount) as scanCount FROM temp GROUP BY productType, dealtTime, HOP(eventTime, interval '10' SECOND, interval '60' SECOND)"
        );

        DataStream<ProductTypeScanCountStatic> resultDs = tEnv.toRetractStream(queryResultTable, ProductTypeScanCountStatic.class).filter(e->e.f0).map(e->e.f1);

        resultDs.addSink(new ProTypeScanStaticSink());

        try {
            env.execute();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

该需求实现设置了开始浏览时间为事件时间且设置了3s的水位线。除此之外，采用了flink的table\sql api实现滑动窗口，即以60s为滑动窗口大小、10s为滑动窗口步长来每10s统计近一分钟各商品浏览次数的需求。

群体用户画像之当日实时终端偏好

package cn.edu.neu.task.windowTask;

import cn.edu.neu.bean.ScanProductInfo;
import cn.edu.neu.bean.Statics;
import cn.edu.neu.sink.StaticsSink;
import com.alibaba.fastjson.JSON;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.SlidingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.triggers.ContinuousProcessingTimeTrigger;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;

import java.util.Properties;

/**
 *
 * @author 32098
 *
 * 群体用户画像之实时终端偏好
 */
public class UseTypeTask {
    public static void main(String[] args) {
        Properties pros = new Properties();
        pros.setProperty("bootstrap.servers", "master:9092");
        pros.setProperty("group.id", "flink");
        pros.setProperty("auto.offset.reset","latest");
        pros.setProperty("flink.partition-discovery.interval-millis","5000");
        pros.setProperty("enable.auto.commit", "true");
        pros.setProperty("auto.commit.interval.ms", "2000");

        FlinkKafkaConsumer<String> kafkaSource = new FlinkKafkaConsumer<String>(
                "flink_kafka",
                new SimpleStringSchema(),
                pros
        );
        kafkaSource.setStartFromLatest();

        // 1. env
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 2. source
        DataStreamSource<String> kafkaDataStream = env.addSource(kafkaSource);

        // 3. transformation
        // to java object
        SingleOutputStreamOperator<ScanProductInfo> scanProductInfoDataStream = kafkaDataStream.map(new MapFunction<String, ScanProductInfo>() {
            @Override
            public ScanProductInfo map(String s) throws Exception {
                return JSON.parseObject(s, ScanProductInfo.class);
            }
        });

        SingleOutputStreamOperator<Statics> resultDs = scanProductInfoDataStream.map(new MapFunction<ScanProductInfo, Tuple2<String, Long>>() {
            @Override
            public Tuple2<String, Long> map(ScanProductInfo scanProductInfo) throws Exception {
                String useType = scanProductInfo.getUseType();
                return Tuple2.of(useType, 1L);
            }
        }).keyBy(e -> e.f0).window(TumblingProcessingTimeWindows.of(Time.days(1))).trigger(ContinuousProcessingTimeTrigger.of(Time.seconds(10))).reduce(new ReduceFunction<Tuple2<String, Long>>() {
            @Override
            public Tuple2<String, Long> reduce(Tuple2<String, Long> tA, Tuple2<String, Long> tB) throws Exception {
                return Tuple2.of(tA.f0, tA.f1+tB.f1);
            }
        }).map(new MapFunction<Tuple2<String, Long>, Statics>() {
            @Override
            public Statics map(Tuple2<String, Long> tupleIn) throws Exception {
                return new Statics("useType", tupleIn.f0, tupleIn.f1);
            }
        });

        resultDs.addSink(new StaticsSink());

        try {
            env.execute("useType analysis");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

该需求实现同样设置了1天的滚动窗口且设置了每10s触发一次计算的触发器。

附：

上述需求实现涉及的Sink类：ProTypeScanStaticSink

package cn.edu.neu.sink;

import cn.edu.neu.bean.ProductTypeScanCountStatic;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.text.SimpleDateFormat;

/**
 * @author 32098
 */
public class ProTypeScanStaticSink extends RichSinkFunction<ProductTypeScanCountStatic> {
    private long lastInvokeTime = 0;
    private SimpleDateFormat dateFormat = null;

    private Connection conn = null;
    private PreparedStatement ps = null;

    @Override
    public void open(Configuration parameters) throws Exception {
        conn = DriverManager.getConnection("jdbc:mysql://master:3306/user_portrait", "root", "Hive@2020");
        String sql = "";

        sql = "insert into product_type_scan_count(product_type, dealt_time, scan_count) values (?,?,?) on duplicate key update scan_count=?";

        ps = conn.prepareStatement(sql);

        dateFormat = new SimpleDateFormat("ss");
        lastInvokeTime = System.currentTimeMillis();
    }

    @Override
    public void invoke(ProductTypeScanCountStatic value, Context context) throws Exception {
        long invokeTime = System.currentTimeMillis();
        if(Integer.parseInt(dateFormat.format(invokeTime)) - Integer.parseInt(dateFormat.format(lastInvokeTime))>5){
            String sqlDelete = "delete from product_type_scan_count";
            PreparedStatement psDelete = conn.prepareStatement(sqlDelete);
            psDelete.executeUpdate();
        }
        ps.setString(1, value.getProductType());
        ps.setString(2, value.getDealtTime());
        ps.setLong(3, value.getScanCount());
        ps.setLong(4, value.getScanCount());
        ps.executeUpdate();
        lastInvokeTime = System.currentTimeMillis();
    }

    @Override
    public void close() throws Exception {
        if (conn != null) {
            conn.close();
        }
        if (ps != null) {
            ps.close();
        }
    }
}

Mysql数据库表：

CREATE TABLE `user_portrait`.`product_type_scan_count`  (
  `product_type` varchar(12) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  `dealt_time` varchar(10) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  `scan_count` bigint(12) NULL DEFAULT NULL,
  PRIMARY KEY (`product_type`, `dealt_time`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;

下文链接：基于Flink的个人装扮商城群体用户画像与数据实时统计系统(六)-需求集C实现
上文链接：基于Flink的个人装扮商城群体用户画像与数据实时统计系统(四)-需求集A实现

珞清殇

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
基于Flink的个人装扮商城群体用户画像与数据实时统计系统(五)-需求集B实现

一、需求集B有什么？所有需求link：基于Flink的个人装扮商城群体用户画像与数据实时统计系统(二)-项目介绍与需求介绍需求集B是针对模拟生成的用户浏览商品的信息提出的，包括：群体用户画像之当日实时品牌偏好实时包品牌偏好实时服装品牌偏好实时鞋品牌偏好各类产品(包类、服装类、鞋类)近一分钟浏览次数统计，每10s统计一次(超喜欢的需求)群体用户画像之当日实时终端偏好附：模拟生成的用户浏览商品的信息字段二、用户浏览商品信息模拟生成updating…...
复制链接

扫一扫