DWS层:商品主题宽表处理并写入ClickHouse

DWS层:商品主题宽表处理

在这里插入图片描述

与访客的dws层的宽表类似,也是把多个事实表的明细数据汇总起来组合成宽表

1. 需求分析与思路

  1. 从Kafka主题中获得数据流
  2. 把Json字符串数据流转换为统一数据对象的数据流
  3. 把统一的数据结构流合并为一个流
    flink多流join使用connect方法,前提是必须是相同类型的数据流,所有需要先封装主题宽表的bean对象,后续进行补充维度信息,分组聚合去重。
  4. 设定事件时间与水位线
  5. 分组、开窗、聚合
  6. 写入ClickHouse

2. 功能实现

2.1 封装商品统计实体类ProductStats

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

import java.math.BigDecimal;
import java.util.HashSet;
import java.util.Set;

@Data
@NoArgsConstructor
@AllArgsConstructor
public class ProductStats {
    
    private String stt;//窗口起始时间
    private String edt;  //窗口结束时间
    private Long sku_id; //sku编号
    private String sku_name;//sku名称
    private BigDecimal sku_price; //sku单价
    private Long spu_id; //spu编号
    private String spu_name;//spu名称
    private Long tm_id; //品牌编号
    private String tm_name;//品牌名称
    private Long category3_id;//品类编号
    private String category3_name;//品类名称
    
    private Long display_ct = 0L; //曝光数    
    private Long click_ct = 0L;  //点击数    
    private Long favor_ct = 0L; //收藏数    
    private Long cart_ct = 0L;  //添加购物车数    
    private Long order_sku_num = 0L; //下单商品个数    
    //下单商品金额  不是整个订单的金额
    private BigDecimal order_amount = BigDecimal.ZERO;    
    private Long order_ct = 0L; //订单数
    
    //支付金额
    private BigDecimal payment_amount = BigDecimal.ZERO;    
    private Long paid_order_ct = 0L;  //支付订单数    
    private Long refund_order_ct = 0L; //退款订单数    
    private BigDecimal refund_amount = BigDecimal.ZERO;    
    private Long comment_ct = 0L;//评论订单数    
    private Long good_comment_ct = 0L; //好评订单数
    
    private Set<Long> orderIdSet = new HashSet<>();  //用于统计订单数    
    private Set<Long> paidOrderIdSet = new HashSet<>(); //用于统计支付订单数
    private Set<Long> refundOrderIdSet = new HashSet<>();//用于退款支付订单数
   
    private Long ts; //统计时间戳
}

2.2 消费Kfka数据, 合成一个流

 public void run(StreamExecutionEnvironment env,
                    Map<String, DataStreamSource<String>> sourceStreams) {

        //1. 解析成8个流,合并成一个流
        final DataStream<ProductStats> productStatsDataStream = parseStreamAndUnionOneStream(sourceStreams);

        //2.开窗聚合
        final SingleOutputStreamOperator<ProductStats> productStatsAggStream = aggregateByDim(productStatsDataStream);

        //3.读取维度数据
        final SingleOutputStreamOperator<ProductStats> productStatsWithDimStreeam = joinDims(productStatsAggStream);

        productStatsWithDimStreeam.print();

        //4.数据写入到clickhouse中
        sink2ClickHouse(productStatsWithDimStreeam);

    }

 //1. 解析成8个流,合并成一个流
    private DataStream<ProductStats> parseStreamAndUnionOneStream(Map<String, DataStreamSource<String>> sourceStreams) {

        //1.解析得到页面点击流
        final SingleOutputStreamOperator<ProductStats> productClickStream = sourceStreams
                .get("dwd_page_log")
                .flatMap(new FlatMapFunction<String, ProductStats>() {
                    @Override
                    public void flatMap(String json,
                                        Collector<ProductStats> out) throws Exception {
                        final JSONObject obj = JSON.parseObject(json);
                        final JSONObject pageObj = obj.getJSONObject("page");
                        final String pageId = pageObj.getString("page_id");

                        if ("good_detail".equalsIgnoreCase(pageId)) {

                            final Long skuId = pageObj.getLong("item");
                            final Long ts = obj.getLong("ts");

                            final ProductStats ps = new ProductStats();
                            ps.setSku_id(skuId);
                            ps.setClick_ct(1l);
                            ps.setTs(ts);

                            out.collect(ps);
                        }
                    }
                });

        //2.曝光率
        final SingleOutputStreamOperator<ProductStats> dispalyStream = sourceStreams
                .get("dwd_display_log")
                .process(new ProcessFunction<String, ProductStats>() {
                    @Override
                    public void processElement(String json,
                                               Context ctx,
                                               Collector<ProductStats> out) throws Exception {

                        final JSONObject obj = JSON.parseObject(json);
                        final String itemType = obj.getString("item_type");

                        if ("sku_id".equalsIgnoreCase(itemType)){

                            final ProductStats ps = new ProductStats();
                            final Long skuId = obj.getLong("item");
                            final Long ts = obj.getLong("ts");

                            ps.setSku_id(skuId);
                            ps.setTs(ts);
                            ps.setDisplay_ct(1l);

                            out.collect(ps);
                        }
                    }
                });

//        productClickStream.print("product");
//        dispalyStream.print("display");


        //3.收藏流
        final SingleOutputStreamOperator<ProductStats> favorStream = sourceStreams
                .get("dwd_favor_info")
                .map(json -> {
                    final JSONObject obj = JSON.parseObject(json);
                    final Long skuId = obj.getLong("sku_id");
                    final Long ts = MyTimeUtil.toTs(obj.getString("create_time"));

                    final ProductStats ps = new ProductStats();

                    ps.setSku_id(skuId);
                    ps.setTs(ts);
                    ps.setFavor_ct(1l);

                    return ps;
                });

        //4.购物车流
        final SingleOutputStreamOperator<ProductStats> cartStream = sourceStreams
                .get("dwd_cart_info")
                .map(json -> {
                    final JSONObject obj = JSON.parseObject(json);
                    final Long skuId = obj.getLong("sku_id");
                    final Long ts = MyTimeUtil.toTs(obj.getString("create_time"));

                    final ProductStats ps = new ProductStats();

                    ps.setSku_id(skuId);
                    ps.setTs(ts);
                    ps.setCart_ct(1l);

                    return ps;
                });

        //5.订单流
        final SingleOutputStreamOperator<ProductStats> orderStream = sourceStreams
                .get("dwm_order_wide")
                .map(json -> {
                    final OrderWide orderWide = JSON.parseObject(json, OrderWide.class);

                    final ProductStats ps = new ProductStats();

                    ps.setSku_id(orderWide.getSku_id());
                    ps.setTs(MyTimeUtil.toTs(orderWide.getCreate_time()));

                    ps.getOrderIdSet().add(orderWide.getOrder_id());
                    //System.out.println(ps.getOrderIdSet());

                    ps.setOrder_amount(orderWide.getSplit_total_amount());
                    ps.setOrder_sku_num(orderWide.getSku_num());

                    return ps;
                });

        //6.支付流
        final SingleOutputStreamOperator<ProductStats> paymentStream = sourceStreams
                .get("dwm_payment_wide")
                .map(json -> {

                    final PaymentWide paymentWide = JSON.parseObject(json, PaymentWide.class);

                    final ProductStats ps = new ProductStats();

                    ps.setSku_id(paymentWide.getSku_id());
                    ps.setTs(MyTimeUtil.toTs(paymentWide.getPayment_create_time()));

                    ps.getPaidOrderIdSet().add(paymentWide.getOrder_id());
                    ps.setPayment_amount(paymentWide.getSplit_total_amount());

                    return ps;
                });

        //7.退款流
        final SingleOutputStreamOperator<ProductStats> refundStream = sourceStreams
                .get("dwd_order_refund_info")
                .map(json -> {
                    final JSONObject obj = JSON.parseObject(json);
                    final Long skuId = obj.getLong("sku_id");
                    final Long ts = MyTimeUtil.toTs(obj.getString("create_time"));

                    final BigDecimal refundAmount = obj.getBigDecimal("refund_amount");

                    final ProductStats ps = new ProductStats();

                    ps.setSku_id(skuId);
                    ps.setTs(ts);

                    ps.getRefundOrderIdSet().add(obj.getLong("order_id"));

                    ps.setRefund_amount(refundAmount);

                    return ps;
                });

        //8.评论流
        final SingleOutputStreamOperator<ProductStats> commentStream = sourceStreams
                .get("dwd_comment_info")
                .map(json -> {
                    final JSONObject obj = JSON.parseObject(json);
                    final Long skuId = obj.getLong("sku_id");
                    final Long ts = MyTimeUtil.toTs(obj.getString("create_time"));

                    final ProductStats ps = new ProductStats();

                    ps.setSku_id(skuId);
                    ps.setTs(ts);
                    ps.setComment_ct(1l);

                    final String appraise = obj.getString("appraise");
                    if (GmallConstant.APPRAISE_GOOD.equalsIgnoreCase(appraise)){
                        ps.setGood_comment_ct(1l);
                    }

                    return ps;
                });

        return productClickStream.union(
                dispalyStream,
                favorStream,
                cartStream,
                orderStream,
                paymentStream,
                refundStream,
                commentStream
        );
    }

2.3 开窗, 聚合

 //2.开窗聚合
    private SingleOutputStreamOperator<ProductStats> aggregateByDim(DataStream<ProductStats> productStatsDataStream) {

        return productStatsDataStream
                .assignTimestampsAndWatermarks(
                        WatermarkStrategy
                                .<ProductStats>forBoundedOutOfOrderness(Duration.ofSeconds(5))
                                .withTimestampAssigner((ps,ts)->ps.getTs())
                )
                .keyBy(ProductStats::getSku_id)
                .window(TumblingEventTimeWindows.of(Time.seconds(5)))
                .reduce(new ReduceFunction<ProductStats>() {
                            @Override
                            public ProductStats reduce(ProductStats s1,
                                                       ProductStats s2) throws Exception {

                                s1.setFavor_ct(s1.getFavor_ct() + s2.getFavor_ct());
                                s1.setClick_ct(s1.getClick_ct() + s2.getClick_ct());
                                s1.setDisplay_ct(s1.getDisplay_ct() + s2.getDisplay_ct());
                                s1.setCart_ct(s1.getCart_ct() + s2.getCart_ct());

                                s1.setOrder_amount(s1.getOrder_amount().add(s2.getOrder_amount()));
                                s1.setOrder_sku_num(s1.getOrder_sku_num() + s2.getOrder_sku_num());

                                s1.setPayment_amount(s1.getPayment_amount().add(s2.getPayment_amount()));

                                s1.setRefund_amount(s1.getRefund_amount().add(s2.getRefund_amount()));

                                s1.setComment_ct(s1.getComment_ct() + s2.getComment_ct());
                                s1.setGood_comment_ct(s1.getGood_comment_ct() + s2.getGood_comment_ct());

                                s1.getOrderIdSet().addAll(s2.getOrderIdSet());
                                s1.getPaidOrderIdSet().addAll(s2.getPaidOrderIdSet());
                                s1.getRefundOrderIdSet().addAll(s2.getRefundOrderIdSet());

                                return s1;
                            }
                        }, new ProcessWindowFunction<ProductStats, ProductStats, Long, TimeWindow>() {
                            @Override
                            public void process(Long key,
                                                Context ctx,
                                                Iterable<ProductStats> elements,
                                                Collector<ProductStats> out) throws Exception {

                                final ProductStats ps = elements.iterator().next();
                                final TimeWindow w = ctx.window();

                                ps.setStt(MyTimeUtil.toDateTimeString(w.getStart()));
                                ps.setEdt(MyTimeUtil.toDateTimeString(w.getEnd()));

                                ps.setOrder_ct((long) ps.getOrderIdSet().size());
                                ps.setPaid_order_ct((long) ps.getPaidOrderIdSet().size());
                                ps.setRefund_order_ct((long) ps.getRefundOrderIdSet().size());

                                out.collect(ps);
                            }
                        });

    }

2.4 补充维度信息

封装异步读取维度函数
为了方便每次异步读取维度数据, 对异步函数做封装.

package com.gmall.realtime.util;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.async.ResultFuture;
import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;
import redis.clients.jedis.Jedis;

import java.sql.Connection;
import java.sql.DriverManager;
import java.util.Collections;
import java.util.concurrent.ThreadPoolExecutor;

public abstract class DimAsyncFunction<T> extends RichAsyncFunction<T,T> {

    private String phoenixUrl;
    private Connection conn;
    private ThreadPoolExecutor pool;

    @Override
    public void open(Configuration parameters) throws Exception {
        phoenixUrl = "jdbc:phoenix:hadoop162,hadoop163,hadoop164:2181";
        conn = DriverManager.getConnection(phoenixUrl);
        pool = MyThreadPoolUtil.getThreadPool();
    }

    @Override
    public void asyncInvoke(T input,
                            ResultFuture<T> resultFuture) throws Exception {
        pool.execute(new Runnable() {
            @Override
            public void run() {

                final Jedis redisClient = MyRedisUtil.getRedisClient();

                //添加维度信息,和具体业务相关
                addDim(input, redisClient);

                resultFuture.complete(Collections.singleton(input));

                redisClient.close();
            }


        });
    }

    public  abstract  void addDim(T input, Jedis redisClient);

    public JSONObject readDim(Jedis redisClient, String tableName, Object id) {

        //先从缓存读取数据,没有命中,再去hbase读取数据
        String key = tableName + ":" + id;

        if (redisClient.exists(key)) {
            System.out.println(tableName + "的 id " + id + "走缓存");
            final String dataJson = redisClient.get(key);
            return JSON.parseObject(dataJson);
        } else {
            System.out.println(tableName + "的 id " + id + "走数据库");

            String sql = "select * from " + tableName + " where ID = ?";
            final JSONObject jsonObject = MyJDBCUtil
                    .queryList(conn, sql, new Object[]{id}, JSONObject.class, false)
                    .get(0);
            //把数据缓存进入redis,过期时间一周
            redisClient.setex(key, 60 * 60 * 24 * 7, jsonObject.toJSONString());

            return jsonObject;
        }
    }


    @Override
    public void close() throws Exception {
        if (conn != null && !conn.isClosed()) {
            conn.close();
        }

        if (pool != null && !pool.isShutdown()) {
            pool.shutdown();
        }
    }
}

补充维度信息

  //3.读取维度数据
    private SingleOutputStreamOperator<ProductStats> joinDims(SingleOutputStreamOperator<ProductStats> productStatsAggStream) {

        return   AsyncDataStream
                .unorderedWait(
                        productStatsAggStream,
                        new DimAsyncFunction<ProductStats>() {
                            @Override
                            public void addDim(ProductStats input, Jedis redisClient) {
                                //读取维度表
                                //1.先读取sku_info
                                final JSONObject skuInfo = readDim(redisClient, "DIM_SKU_INFO", input.getSku_id());
                                input.setSku_name(skuInfo.getString("SKU_NAME"));
                                input.setSku_price(skuInfo.getBigDecimal("PRICE"));
                                input.setCategory3_id(skuInfo.getLong("CATEGORY3_ID"));
                                input.setSpu_id(skuInfo.getLong("SPU_ID"));
                                input.setTm_id(skuInfo.getLong("TM_ID"));

                                //2.读取spu_info
                                final JSONObject spuInfo = readDim(redisClient, "DIM_SPU_INFO", input.getSpu_id());
                                input.setSpu_name(spuInfo.getString("SPU_NAME"));

                                //3.读取品牌表
                                final JSONObject tmInfo = readDim(redisClient, "DIM_BASE_TRADEMARK", input.getTm_id());
                                input.setTm_name(tmInfo.getString("TM_NAME"));

                                //4.读取三级品类
                                final JSONObject c3Info = readDim(redisClient, "DIM_BASE_CATEGORY3", input.getCategory3_id());
                                input.setCategory3_name(c3Info.getString("NAME"));

                            }
                        },
                        30,
                        TimeUnit.SECONDS);

    }

2.5 写入到ClickHouse

在ClickHouse中创建主题宽表

use gmall2021;
create table product_stats_2021 (
   stt DateTime,
   edt DateTime,
   sku_id  UInt64,
   sku_name String,
   sku_price Decimal64(2),
   spu_id UInt64,
   spu_name String ,
   tm_id UInt64,
   tm_name String,
   category3_id UInt64,
   category3_name String ,
   display_ct UInt64,
   click_ct UInt64,
   favor_ct UInt64,
   cart_ct UInt64,
   order_sku_num UInt64,
   order_amount Decimal64(2),
   order_ct UInt64 ,
   payment_amount Decimal64(2),
   paid_order_ct UInt64,
   refund_order_ct UInt64,
   refund_amount Decimal64(2),
   comment_ct UInt64,
   good_comment_ct UInt64 ,
   ts UInt64
)engine =ReplacingMergeTree( ts)
        partition by  toYYYYMMDD(stt)
        order by   (stt,edt,sku_id );

写数据到ClickHouse 中

private void sink2Clickhouse(SingleOutputStreamOperator<ProductStats> resultStream) {
    resultStream.addSink(MySinkUtil.getClickHouseSink("gmall2021", "product_stats_2021", ProductStats.class));
}

MySinkUtil详见:https://blog.csdn.net/weixin_42796403/article/details/115144073

  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值