【博学谷学习记录】超强总结,用心分享|狂野大数据课程【DWB层构建】的总结分析

DWB层构建

DWB 层作用

基于业务进行降维,就是将DWD业务上关联密切的表进行提起的join,形成宽表,这样以后可以避免大量的join操作,直接基于宽表进行分析即可

在这里插入图片描述

订单明细宽表

  • 1、寻找业务关联表

    订单事实表:        fact_shop_order
    订单组表:          fact_shop_order_group
    订单组支付信息表:   fact_order_pay
    商品快照表:        fact_shop_order_goods_details
    订单副表(订单详情表):          fact_shop_order_address_detail
    退款订单表:        fact_refund_order
    结算信息表:        fact_order_settle
    订单评价表:        fact_goods_evaluation
    订单配送表:       fact_order_delievery_item
    
  • 2、理清表字段之间关系

在这里插入图片描述

  • 3、筛选业务表关键字段,建立目标表

    #原则:先排除分析中一定不会要的字段,剩下的原则是宁滥勿缺
    
    DROP TABLE if EXISTS yp_dwb.dwb_order_detail;
    CREATE TABLE yp_dwb.dwb_order_detail(
        -- 订单事实表
      order_id string COMMENT '根据一定规则生成的订单编号',
      order_num string COMMENT '订单序号',
      buyer_id string COMMENT '买家的userId',
      store_id string COMMENT '店铺的id',
      order_from string COMMENT '渠道类型:android、ios、miniapp、pcweb、other',
      order_state int COMMENT '订单状态:1.已下单\; 2.已付款, 3. 已确认 \;4.配送\; 5.已完成\; 6.退款\;7.已取消',
      create_date string COMMENT '下单时间',
      finnshed_time timestamp COMMENT '订单完成时间,当配送员点击确认送达时,进行更新订单完成时间,后期需要根据订单完成时间,进行自动收货以及自动评价',
      is_settlement tinyint COMMENT '是否结算\;0.待结算订单\; 1.已结算订单\;',
      is_delete tinyint COMMENT '订单删除的状态:0.未删除\;  1.已删除\;(默认0)',
      evaluation_state tinyint COMMENT '订单评价的状态:0.未评价\;  1.已评价\;(默认0)',
      way string COMMENT '取货方式:SELF自提\;SHOP店铺负责配送',
      is_stock_up int COMMENT '是否需要备货 0:不需要    1:需要    2:平台确认备货  3:已完成备货 4平台已经将货物送至店铺 ',
    --  订单副表
      order_amount decimal(36,2) COMMENT '订单总金额:购买总金额-优惠金额',
      discount_amount decimal(36,2) COMMENT '优惠金额',
      goods_amount decimal(36,2) COMMENT '用户购买的商品的总金额+运费',
      is_delivery string COMMENT '0.自提;1.配送',
      buyer_notes string COMMENT '买家备注留言',
      pay_time string,
      receive_time string,
      delivery_begin_time string,
      arrive_store_time string,
      arrive_time string COMMENT '订单完成时间,当配送员点击确认送达时,进行更新订单完成时间,后期需要根据订单完成时间,进行自动收货以及自动评价',
      create_user string,
      create_time string,
      update_user string,
      update_time string,
      is_valid tinyint COMMENT '是否有效  0: false\; 1: true\;   订单是否有效的标志',
    --  订单组
      group_id string COMMENT '订单分组id',
      is_pay tinyint COMMENT '订单组是否已支付,0未支付,1已支付',
    --  订单组支付
      group_pay_amount decimal(36,2) COMMENT '订单总金额\;',
    --  退款单
      refund_id string COMMENT '退款单号',
      apply_date string COMMENT '用户申请退款的时间',
      refund_reason string COMMENT '买家退款原因',
      refund_amount decimal(36,2) COMMENT '订单退款的金额',
      refund_state tinyint COMMENT '1.申请退款\;2.拒绝退款\; 3.同意退款,配送员配送\; 4:商家同意退款,用户亲自送货 \;5.退款完成',
    --  结算单
      settle_id string COMMENT '结算单号',
      settlement_amount decimal(36,2) COMMENT '如果发生退款,则结算的金额 = 订单的总金额 - 退款的金额',
      dispatcher_user_id string COMMENT '配送员id',
      dispatcher_money decimal(36,2) COMMENT '配送员的配送费(配送员的运费(如果退货方式为1:则买家支付配送费))',
      circle_master_user_id string COMMENT '圈主id',
      circle_master_money decimal(36,2) COMMENT '圈主分润的金额',
      plat_fee decimal(36,2) COMMENT '平台应得的分润',
      store_money decimal(36,2) COMMENT '商家应得的订单金额',
      status tinyint COMMENT '0.待结算;1.待审核 \; 2.完成结算;3.拒绝结算',
      settle_time string COMMENT ' 结算时间',
    -- 订单评价
      evaluation_id string,
      evaluation_user_id string COMMENT '评论人id',
      geval_scores int COMMENT '综合评分',
      geval_scores_speed int COMMENT '送货速度评分0-5分(配送评分)',
      geval_scores_service int COMMENT '服务评分0-5分',
      geval_isanony tinyint COMMENT '0-匿名评价,1-非匿名',
      evaluation_time string,
    -- 订单配送
      delievery_id string COMMENT '主键id',
      dispatcher_order_state tinyint COMMENT '配送订单状态:0.待接单.1.已接单,2.已到店.3.配送中 4.商家普通提货码完成订单.5.商家万能提货码完成订单。6,买家完成订单',
      delivery_fee decimal(36,2) COMMENT '配送员的运费',
      distance int COMMENT '配送距离',
      dispatcher_code string COMMENT '收货码',
      receiver_name string COMMENT '收货人姓名',
      receiver_phone string COMMENT '收货人电话',
      sender_name string COMMENT '发货人姓名',
      sender_phone string COMMENT '发货人电话',
      delievery_create_time string,
    -- 商品快照
      order_goods_id string COMMENT '--商品快照id',
      goods_id string COMMENT '购买商品的id',
      buy_num int COMMENT '购买商品的数量',
      goods_price decimal(36,2) COMMENT '购买商品的价格',
      total_price decimal(36,2) COMMENT '购买商品的价格 = 商品的数量 * 商品的单价 ',
      goods_name string COMMENT '商品的名称',
      goods_specification string COMMENT '商品规格',
      goods_type string COMMENT '商品分类     ytgj:进口商品    ytsc:普通商品     hots爆品',
      goods_brokerage decimal(36,2) COMMENT '商家设置的商品分润的金额',
      is_goods_refund tinyint COMMENT '0.不退款\; 1.退款'
    )
    COMMENT '订单明细表'
    PARTITIONED BY(dt STRING)
    row format delimited fields terminated by '\t'
    stored as orc tblproperties ('orc.compress' = 'SNAPPY');
    
  • 4、编写SQL实现

    #如果宽表查询出来的结果有问题,可能有以下原因:
    1、你的SQL有问题,尤其是on条件问题
    2、你的上一层数据有问题(你要去上上一层查看)
    
    

    – 方式1-没有使用with语句

    -- 动态分区
    SET hive.exec.dynamic.partition=true;
    SET hive.exec.dynamic.partition.mode=nonstrict;
    
    set hive.exec.max.dynamic.partitions.pernode=10000;  -- 每一台主机分区的最大个数
    set hive.exec.max.dynamic.partitions=100000;         -- 总分区的最大个数
    set hive.exec.max.created.files=150000;              -- 总分区的最大个数
    
    -- hive压缩
    set hive.exec.compress.intermediate=true;
    set hive.exec.compress.output=true;
    
    -- 写入时压缩生效
    set hive.exec.orc.compression.strategy=COMPRESSION;
    -- 谓词下推
    insert into yp_dwb.dwb_order_detail partition (dt)
    select
    -- 订单事实表
    so.id as order_id,
    so.order_num,
    so.buyer_id,
    so.store_id,
    so.order_from,
    so.order_state,
    so.create_date,
    so.finnshed_time,
    so.is_settlement,
    so.is_delete,
    so.evaluation_state,
    so.way,
    so.is_stock_up,
    
    
    -- 订单副表
    soad.order_amount,
    soad.discount_amount,
    soad.goods_amount,
    soad.is_delivery,
    soad.buyer_notes,
    soad.pay_time,
    soad.receive_time,
    soad.delivery_begin_time,
    soad.arrive_store_time,
    soad.arrive_time,
    soad.create_user,
    soad.create_time,
    soad.update_user,
    soad.update_time,
    soad.is_valid,
    
    --  订单组表
    sog.group_id,
    sog.is_pay,
    
    --  订单组支付
    op.order_pay_amount as group_pay_amount,
    --  退款单
    ro.id as refund_id,
    ro.apply_date,
    ro.refund_reason,
    ro.refund_amount,
    ro.refund_state,
    --  结算表
    os.id as settle_id,
    os.settlement_amount,
    os.dispatcher_user_id,
    os.dispatcher_money,
    os.circle_master_user_id,
    os.circle_master_money,
    os.plat_fee,
    os.store_money,
    os.status,
    os.settle_time,
    -- 订单评价
    ge.id as evaluation_id,
    ge.user_id as evaluation_user_id,
    ge.geval_scores,
    ge.geval_scores_speed,
    ge.geval_scores_service,
    ge.geval_isanony,
    ge.create_time as evaluation_time,
    -- 订单配送
    odi.id as delievery_id,
    odi.dispatcher_order_state,
    odi.delivery_fee,
    odi.distance,
    odi.dispatcher_code,
    odi.receiver_name,
    odi.receiver_phone,
    odi.sender_name,
    odi.sender_phone,
    odi.create_time as delievery_create_time,
    -- 商品快照
    sogd.id as order_goods_id,
    sogd.goods_id,
    sogd.buy_num,
    sogd.goods_price,
    sogd.total_price,
    sogd.goods_name,
    sogd.goods_specification,
    sogd.goods_type,
    sogd.goods_brokerage,
    sogd.is_refund as is_goods_refund,
    substr(so.create_time,1,10) as dt
    from (select * from yp_dwd.fact_shop_order where end_date = '9999-99-99' and is_valid = 1) so
         left join (select * from yp_dwd.fact_shop_order_group where end_date = '9999-99-99' and is_valid = 1 ) sog  on so.id = sog.order_id
         left join (select * from yp_dwd.fact_order_pay where end_date = '9999-99-99' and is_valid = 1 ) op  on sog.group_id = op.group_id
         left join (select * from yp_dwd.fact_shop_order_goods_details where end_date = '9999-99-99' and is_valid = 1 ) sogd  on so.id = sogd.order_id
         left join (select * from yp_dwd.fact_shop_order_address_detail where end_date = '9999-99-99' and is_valid = 1 ) soad  on so.id = soad.id
         left join (select * from yp_dwd.fact_refund_order where end_date = '9999-99-99' and is_valid = 1 ) ro  on so.id = ro.order_id
         left join (select * from yp_dwd.fact_order_settle where end_date = '9999-99-99' and is_valid = 1 ) os  on so.id = os.order_id
         left join (select * from yp_dwd.fact_goods_evaluation where  is_valid = 1 ) ge  on so.id = ge.order_id
         left join (select * from yp_dwd.fact_order_delievery_item where  is_valid = 1 ) odi  on so.id = odi.shop_order_id;
    
    

    – 方式2-使用with语句

    #注意,这个是实例代码,代码还没有写完,期待补充完整
    with  so as (
        select
         -- 订单事实表
        id ,
        order_num,
        buyer_id,
        store_id,
        order_from,
        order_state,
        create_date,
        finnshed_time,
        is_settlement,
        is_delete,
        evaluation_state,
        way,
        is_stock_up
        from yp_dwd.fact_shop_order
        where end_date = '9999-99-99' and is_valid = 1
    ),
    soad as (
        select
        -- 订单副表
        id,
        order_amount,
        discount_amount,
        goods_amount,
        is_delivery,
        buyer_notes,
        pay_time,
        receive_time,
        delivery_begin_time,
        arrive_store_time,
        arrive_time,
        create_user,
        create_time,
        update_user,
        update_time,
        is_valid
        from yp_dwd.fact_shop_order_address_detail
        where end_date = '9999-99-99' and is_valid = 1
    ),
    sog as (
        select
         order_id,
         group_id,
         is_pay
        from yp_dwd.fact_shop_order_group
        where end_date = '9999-99-99' and is_valid = 1
    )
    
    select
     -- 订单事实表
    so.id as order_id,
    so.order_num,
    so.buyer_id,
    so.store_id,
    so.order_from,
    so.order_state,
    so.create_date,
    so.finnshed_time,
    so.is_settlement,
    so.is_delete,
    so.evaluation_state,
    so.way,
    so.is_stock_up,
    
    
    -- 订单副表
    soad.order_amount,
    soad.discount_amount,
    soad.goods_amount,
    soad.is_delivery,
    soad.buyer_notes,
    soad.pay_time,
    soad.receive_time,
    soad.delivery_begin_time,
    soad.arrive_store_time,
    soad.arrive_time,
    soad.create_user,
    soad.create_time,
    soad.update_user,
    soad.update_time,
    soad.is_valid,
    
    --  订单组表
    sog.group_id,
    sog.is_pay
    from  so
          left join   soad on so.id = soad.id
          left join   sog  on so.id = sog.order_id;
    
    

店铺明细宽表

  • 1、寻找业务关联表

    店铺表: dim_store
    商圈表: dim_trade_area
    地址信息表: dim_location
    行政区域表: dim_district
    
  • 2、理清表字段之间关系

    [外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-U8lyoHlA-1669378314454)(笔记-新零售项目-DWB层构建.assets/image-20221110201552623.png)]

  • 3、筛选业务表关键字段,建立目标表

    DROP TABLE if EXISTS yp_dwb.dwb_shop_detail;
    CREATE TABLE yp_dwb.dwb_shop_detail(
    --  店铺
      id string, 
      address_info string COMMENT '店铺详细地址',  
      store_name string COMMENT '店铺名称', 
      is_pay_bond tinyint COMMENT '是否有交过保证金 1:是0:否', 
      trade_area_id string COMMENT '归属商圈ID', 
      delivery_method tinyint COMMENT '配送方式  1 :自提 ;3 :自提加配送均可\; 2 : 商家配送', 
      store_type int COMMENT '店铺类型 22天街网店 23实体店 24直营店铺 33会员专区店', 
      is_primary tinyint COMMENT '是否是总店 1: 是 2: 不是', 
      parent_store_id string COMMENT '父级店铺的id,只有当is_primary类型为2时有效', 
    --  商圈
      trade_area_name string COMMENT '商圈名称',
    -- 行政区域
      province_id string COMMENT '店铺所在省份ID', 
      city_id string COMMENT '店铺所在城市ID', 
      area_id string COMMENT '店铺所在县ID', 
      province_name string COMMENT '省份名称', 
      city_name string COMMENT '城市名称', 
      area_name string COMMENT '县名称'
      )
    COMMENT '店铺明细表'
    PARTITIONED BY(dt STRING)
    row format delimited fields terminated by '\t' 
    stored as orc  tblproperties ('orc.compress' = 'SNAPPY');
    
  • 4、编写SQL实现

    --动态分区配置
    set hive.exec.dynamic.partition=true;
    set hive.exec.dynamic.partition.mode=nonstrict;
    set hive.exec.max.dynamic.partitions.pernode=10000;
    set hive.exec.max.dynamic.partitions=100000;
    set hive.exec.max.created.files=150000;
    --hive压缩
    set hive.exec.compress.intermediate=true;
    set hive.exec.compress.output=true;
    --写入时压缩生效
    set hive.exec.orc.compression.strategy=COMPRESSION;
    
    insert into yp_dwb.dwb_shop_detail partition (dt)
    select
       -- 店铺表
        s.id,
        s.address_info,
        s.name as store_name,
        s.is_pay_bond,
        s.trade_area_id,
        s.delivery_method,
        s.store_type,
        s.is_primary,
        s.parent_store_id,
         -- 商圈表
        ta.name as trade_area_name,
    
        -- 行政区域表
        d3.id as province_id,
        d2.id as city_id,
        d1.id as area_id,
        d3.name as province_name,
        d2.name as city_name,
        d1.name as area_name,
        substring(s.create_time,1,10) as dt
    from (select * from yp_dwd.dim_store where  end_date = '9999-99-99') s
         left join  (select * from yp_dwd.dim_trade_area where  end_date = '9999-99-99')  ta on s.trade_area_id = ta.id
         left  join (select * from yp_dwd.dim_location where  end_date = '9999-99-99')  l on s.id =  l.correlation_id
         left  join yp_dwd.dim_district  d1  on l.adcode = d1.id  -- 区 / 县
         left  join yp_dwd.dim_district  d2  on d1.pid = d2.id    -- 市
         left  join yp_dwd.dim_district  d3  on d2.pid = d3.id;   -- 省
    

商品明细宽表

  • 1、寻找业务关联表

    商品SKU表:dim_goods           SPU ----》(ipone X)    SKU -----》(128G的白色的ipone X)
    商品分类表:dim_goods_class
    品牌表:   dim_brand
    
  • 2、理清表字段之间关系

    [外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-OKOTfTAY-1669378314454)(笔记-新零售项目-DWB层构建.assets/image-20221112140100879.png)]

  • 3、筛选业务表关键字段,建立目标表

    DROP TABLE if EXISTS yp_dwb.dwb_goods_detail;
    CREATE TABLE yp_dwb.dwb_goods_detail(
       -- 商品SKU表:dim_goods 
      id string, 
      store_id string COMMENT '所属商店ID', 
      class_id string COMMENT '分类id:只保存最后一层分类id', 
      store_class_id string COMMENT '店铺分类id', 
      brand_id string COMMENT '品牌id', 
      goods_name string COMMENT '商品名称', 
      goods_specification string COMMENT '商品规格', 
      search_name string COMMENT '模糊搜索名称字段:名称_+真实名称', 
      goods_sort int COMMENT '商品排序', 
      goods_market_price decimal(36,2) COMMENT '商品市场价', 
      goods_price decimal(36,2) COMMENT '商品销售价格(原价)', 
      goods_promotion_price decimal(36,2) COMMENT '商品促销价格(售价)', 
      goods_storage int COMMENT '商品库存', 
      goods_limit_num int COMMENT '购买限制数量', 
      goods_unit string COMMENT '计量单位', 
      goods_state tinyint COMMENT '商品状态 1正常,2下架,3违规(禁售)', 
      goods_verify tinyint COMMENT '商品审核状态: 1通过,2未通过,3审核中', 
      activity_type tinyint COMMENT '活动类型:0无活动1促销2秒杀3折扣', 
      discount int COMMENT '商品折扣(%)', 
      seckill_begin_time string COMMENT '秒杀开始时间', 
      seckill_end_time string COMMENT '秒杀结束时间', 
      seckill_total_pay_num int COMMENT '已秒杀数量', 
      seckill_total_num int COMMENT '秒杀总数限制', 
      seckill_price decimal(36,2) COMMENT '秒杀价格', 
      top_it tinyint COMMENT '商品置顶:1-是,0-否', 
      create_user string, 
      create_time string, 
      update_user string, 
      update_time string, 
      is_valid tinyint COMMENT '0 :失效,1 :开启', 
    --  商品小类 -三级分类
      min_class_id string COMMENT '分类id:只保存最后一层分类id', 
      min_class_name string COMMENT '店铺内分类名字', 
    --  商品中类 - 二级分类
      mid_class_id string COMMENT '分类id:只保存最后一层分类id', 
      mid_class_name string COMMENT '店铺内分类名字', 
    --  商品大类 - 一级分类
      max_class_id string COMMENT '分类id:只保存最后一层分类id', 
      max_class_name string COMMENT '店铺内分类名字', 
    --  品牌
      brand_name string COMMENT '品牌名称'
      )
    COMMENT '商品明细表'
    PARTITIONED BY(dt STRING)
    row format delimited fields terminated by '\t' 
    stored as orc 
    tblproperties ('orc.compress' = 'SNAPPY');
    
  • 4、编写SQL实现

    --动态分区配置
    set hive.exec.dynamic.partition=true;
    set hive.exec.dynamic.partition.mode=nonstrict;
    set hive.exec.max.dynamic.partitions.pernode=10000;
    set hive.exec.max.dynamic.partitions=100000;
    set hive.exec.max.created.files=150000;
    --hive压缩
    set hive.exec.compress.intermediate=true;
    set hive.exec.compress.output=true;
    --写入时压缩生效
    set hive.exec.orc.compression.strategy=COMPRESSION;
    
    -- 开启map端join
    set hive.auto.convert.join=true;  -- 开启Map Join的支持, 默认就是True
    set hive.auto.convert.join.noconditionaltask.size=512000000; -- 小表的最大阈值
    
    -- 使用于中表join大表
    set hive.optimize.bucketmapjoin = true;
    
    insert into yp_dwb.dwb_goods_detail partition (dt)
    select
    -- 商品表
        g.id,
        g.store_id,
        g.class_id,
        g.store_class_id,
        g.brand_id,
        g.goods_name,
        g.goods_specification,
        g.search_name,
        g.goods_sort,
        g.goods_market_price,
        g.goods_price,
        g.goods_promotion_price,
        g.goods_storage,
        g.goods_limit_num,
        g.goods_unit,
        g.goods_state,
        g.goods_verify,
        g.activity_type,
        g.discount,
        g.seckill_begin_time,
        g.seckill_end_time,
        g.seckill_total_pay_num,
        g.seckill_total_num,
        g.seckill_price,
        g.top_it,
        g.create_user,
        g.create_time,
        g.update_user,
        g.update_time,
        g.is_valid,
    
        -- 商品分类表
        case when c1.level = 3
              then c1.id
            else null
        end as  min_class_id,
    
        case when c1.level = 3
              then c1.name
            else null
        end as min_class_name,
        -- 中类
        case when c1.level = 2
              then c1.id
             when c2.level = 2
              then c2.id
             else null
        end as mid_class_id,
       case when c1.level = 2
              then c1.name
             when c2.level = 2
              then c2.name
             else null
        end as mid_class_name,
    
       -- 大类
        case when c1.level = 1
                then c1.id
             when c2.level = 1
                then c2.id
             when c3.level = 1
                then c3.id
             else null
       end  as max_class_id,
    
       case when c1.level = 1
                then c1.name
             when c2.level = 1
                then c2.name
             when c3.level = 1
                then c3.name
             else null
       end  as max_class_name,
       --- 品牌表
        b.brand_name,
    
        substring(g.create_time,1,10) as dt
    from (select * from yp_dwd.dim_goods where end_date = '9999-99-99') g
        left join (select * from yp_dwd.dim_brand  where end_date = '9999-99-99') b on g.brand_id = b.id
        left join (select * from yp_dwd.dim_goods_class  where end_date = '9999-99-99') c1  on g.store_class_id = c1.id
        left join (select * from yp_dwd.dim_goods_class  where end_date = '9999-99-99') c2 on c1.parent_id = c2.id
        left join (select * from yp_dwd.dim_goods_class  where end_date = '9999-99-99') c3 on c2.parent_id = c3.id;
    
    
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值