文章目录
一、概述
入仓时机:业务数据、日志数据都同步到HDFS后。即Sqoop同步业务全量表完成后。
业务数据进入HDFS时机:全量数据每天凌晨将昨天的数据同步到HDFS昨天目录。增量数据实时同步到HDFS到binlog对应的日期目录。
日志数据进入HDFS时机:实时同步到日志行为发生的日期,JSON串中的ts字段解析出来的日期。
二、数据装载(HDFS -> Hive)
2.1 创建Hive表
共28张表:16张业务全量表、11张业务增量表、1张流量增量表。
注意1:Hive表默认是TextFile格式。
注意2:省份表和地区表为非分区表。
业务全量表:HDFS文件以 /t 分割属性字段。
业务增量表:HDFS文件是JSON串。
流量增量表:HDFS文件是JSON串。
2.1.1 业务全量表建表语句
DROP TABLE IF EXISTS ods_sku_info_full;
CREATE EXTERNAL TABLE ods_sku_info_full(
`id` STRING COMMENT 'skuId',
`spu_id` STRING COMMENT 'spuid',
`price` DECIMAL(16,2) COMMENT '价格',
`sku_name` STRING COMMENT '商品名称',
`sku_desc` STRING COMMENT '商品描述',
`weight` DECIMAL(16,2) COMMENT '重量',
`tm_id` STRING COMMENT '品牌id',
`category3_id` STRING COMMENT '品类id',
`is_sale` STRING COMMENT '是否在售',
`create_time` STRING COMMENT '创建时间'
) COMMENT 'SKU商品表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_sku_info_full/';
DROP TABLE IF EXISTS ods_base_category1_full;
CREATE EXTERNAL TABLE ods_base_category1_full(
`id` STRING COMMENT 'id',
`name` STRING COMMENT '名称'
) COMMENT '商品一级分类表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_base_category1_full/';
DROP TABLE IF EXISTS ods_base_category2_full;
CREATE EXTERNAL TABLE ods_base_category2_full(
`id` STRING COMMENT ' id',
`name` STRING COMMENT '名称',
`category1_id` STRING COMMENT '一级品类id'
) COMMENT '商品二级分类表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_base_category2_full/';
DROP TABLE IF EXISTS ods_base_category3_full;
CREATE EXTERNAL TABLE ods_base_category3_full(
`id` STRING COMMENT ' id',
`name` STRING COMMENT '名称',
`category2_id` STRING COMMENT '二级品类id'
) COMMENT '商品三级分类表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_base_category3_full/';
DROP TABLE IF EXISTS ods_base_province;
CREATE EXTERNAL TABLE ods_base_province (
`id` STRING COMMENT '编号',
`name` STRING COMMENT '省份名称',
`region_id` STRING COMMENT '地区ID',
`area_code` STRING COMMENT '地区编码',
`iso_code` STRING COMMENT 'ISO-3166编码,供可视化使用',
`iso_3166_2` STRING COMMENT 'IOS-3166-2编码,供可视化使用'
) COMMENT '省份表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_base_province/';
DROP TABLE IF EXISTS ods_base_trademark_full;
CREATE EXTERNAL TABLE ods_base_trademark_full (
`id` STRING COMMENT '编号',
`tm_name` STRING COMMENT '品牌名称'
) COMMENT '品牌表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_base_trademark_full/';
DROP TABLE IF EXISTS ods_spu_info_full;
CREATE EXTERNAL TABLE ods_spu_info_full(
`id` STRING COMMENT 'spuid',
`spu_name` STRING COMMENT 'spu名称',
`category3_id` STRING COMMENT '品类id',
`tm_id` STRING COMMENT '品牌id'
) COMMENT 'SPU商品表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_spu_info_full/';
DROP TABLE IF EXISTS ods_favor_info_full;
CREATE EXTERNAL TABLE ods_favor_info_full(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT 'skuid',
`spu_id` STRING COMMENT 'spuid',
`is_cancel` STRING COMMENT '是否取消',
`create_time` STRING COMMENT '收藏时间',
`cancel_time` STRING COMMENT '取消时间'
) COMMENT '商品收藏表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_favor_info_full/';
DROP TABLE IF EXISTS ods_cart_info_full;
CREATE EXTERNAL TABLE ods_cart_info_full(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT 'skuid',
`cart_price` DECIMAL(16,2) COMMENT '放入购物车时价格',
`sku_num` BIGINT COMMENT '数量',
`sku_name` STRING COMMENT 'sku名称 (冗余)',
`create_time` STRING COMMENT '创建时间',
`operate_time` STRING COMMENT '修改时间',
`is_ordered` STRING COMMENT '是否已经下单',
`order_time` STRING COMMENT '下单时间',
`source_type` STRING COMMENT '来源类型',
`source_id` STRING COMMENT '来源编号'
) COMMENT '加购表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_cart_info_full/';
DROP TABLE IF EXISTS ods_coupon_info_full;
CREATE EXTERNAL TABLE ods_coupon_info_full(
`id` STRING COMMENT '购物券编号',
`coupon_name` STRING COMMENT '购物券名称',
`coupon_type` STRING COMMENT '购物券类型 1 现金券 2 折扣券 3 满减券 4 满件打折券',
`condition_amount` DECIMAL(16,2) COMMENT '满额数',
`condition_num` BIGINT COMMENT '满件数',
`activity_id` STRING COMMENT '活动编号',
`benefit_amount` DECIMAL(16,2) COMMENT '减金额',
`benefit_discount` DECIMAL(16,2) COMMENT '折扣',
`create_time` STRING COMMENT '创建时间',
`range_type` STRING COMMENT '范围类型 1、商品 2、品类 3、品牌',
`limit_num` BIGINT COMMENT '最多领用次数',
`taken_count` BIGINT COMMENT '已领用次数',
`start_time` STRING COMMENT '开始领取时间',
`end_time` STRING COMMENT '结束领取时间',
`operate_time` STRING COMMENT '修改时间',
`expire_time` STRING COMMENT '过期时间'
) COMMENT '优惠券表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_coupon_info_full/';
DROP TABLE IF EXISTS ods_activity_info_full;
CREATE EXTERNAL TABLE ods_activity_info_full(
`id` STRING COMMENT '编号',
`activity_name` STRING COMMENT '活动名称',
`activity_type` STRING COMMENT '活动类型',
`start_time` STRING COMMENT '开始时间',
`end_time` STRING COMMENT '结束时间',
`create_time` STRING COMMENT '创建时间'
) COMMENT '活动信息表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_activity_info_full/';
DROP TABLE IF EXISTS ods_activity_rule_full;
CREATE EXTERNAL TABLE ods_activity_rule_full(
`id` STRING COMMENT '编号',
`activity_id` STRING COMMENT '活动ID',
`activity_type` STRING COMMENT '活动类型',
`condition_amount` DECIMAL(16,2) COMMENT '满减金额',
`condition_num` BIGINT COMMENT '满减件数',
`benefit_amount` DECIMAL(16,2) COMMENT '优惠金额',
`benefit_discount` DECIMAL(16,2) COMMENT '优惠折扣',
`benefit_level` STRING COMMENT '优惠级别'
) COMMENT '活动规则表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_activity_rule_full/';
DROP TABLE IF EXISTS ods_base_dic_full;
CREATE EXTERNAL TABLE ods_base_dic_full(
`dic_code` STRING COMMENT '编号',
`dic_name` STRING COMMENT '编码名称',
`parent_code` STRING COMMENT '父编码',
`create_time` STRING COMMENT '创建日期',
`operate_time` STRING COMMENT '操作日期'
) COMMENT '编码字典表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_base_dic_full/';
DROP TABLE IF EXISTS ods_sku_attr_value_full;
CREATE EXTERNAL TABLE ods_sku_attr_value_full(
`id` STRING COMMENT '编号',
`attr_id` STRING COMMENT '平台属性ID',
`value_id` STRING COMMENT '平台属性值ID',
`sku_id` STRING COMMENT '商品ID',
`attr_name` STRING COMMENT '平台属性名称',
`value_name` STRING COMMENT '平台属性值名称'
) COMMENT 'sku平台属性表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_sku_attr_value_full/';
DROP TABLE IF EXISTS ods_sku_sale_attr_value_full;
CREATE EXTERNAL TABLE ods_sku_sale_attr_value_full(
`id` STRING COMMENT '编号',
`sku_id` STRING COMMENT 'sku_id',
`spu_id` STRING COMMENT 'spu_id',
`sale_attr_value_id` STRING COMMENT '销售属性值id',
`sale_attr_id` STRING COMMENT '销售属性id',
`sale_attr_name` STRING COMMENT '销售属性名称',
`sale_attr_value_name` STRING COMMENT '销售属性值名称'
) COMMENT 'sku销售属性名称'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_sku_sale_attr_value_full/';
DROP TABLE IF EXISTS ods_base_region;
CREATE EXTERNAL TABLE ods_base_region (
`id` STRING COMMENT '编号',
`region_name` STRING COMMENT '地区名称'
) COMMENT '地区表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_base_region/';
DROP TABLE IF EXISTS dw_ods.ods_user_info_full;
CREATE EXTERNAL TABLE dw_ods.ods_user_info_full(
`id` STRING COMMENT 'userId',
`login_name` STRING COMMENT '用户名称',
`nick_name` STRING COMMENT '用户昵称',
`passwd` STRING COMMENT '用户密码',
`name` STRING COMMENT '用户姓名',
`phone_num` STRING COMMENT '手机号',
`email` STRING COMMENT '邮箱',
`head_img` STRING COMMENT '头像',
`user_level` STRING COMMENT '用户级别',
`birthday` STRING COMMENT '用户生日',
`gender` STRING COMMENT '性别 M男,F女',
`create_time` STRING COMMENT '创建时间',
`operate_time` STRING COMMENT '修改时间',
`status` STRING COMMENT '状态'
) COMMENT 'SKU商品表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_user_info_full/';
DROP TABLE IF EXISTS dw_ods.ods_order_info_full;
CREATE EXTERNAL TABLE dw_ods.ods_order_info_full(
`id` BIGINT COMMENT '订单号',
`consignee` STRING COMMENT '收货人',
`consignee_tel` STRING COMMENT '收件人电话',
`total_amount` DECIMAL(16,2) COMMENT '总金额',
`order_status` STRING COMMENT '订单状态',
`user_id` BIGINT COMMENT '用户id',
`payment_way` STRING COMMENT '付款方式',
`delivery_address` STRING COMMENT '送货地址',
`order_comment` STRING COMMENT '订单备注',
`out_trade_no` STRING COMMENT '订单交易编号(第三方支付用)',
`trade_body` STRING COMMENT '订单描述(第三方支付用)',
`create_time` STRING COMMENT '创建时间',
`operate_time` STRING COMMENT '操作时间',
`expire_time` STRING COMMENT '失效时间',
`process_status` STRING COMMENT '进度状态',
`tracking_no` STRING COMMENT '物流单编号',
`parent_order_id` BIGINT COMMENT '父订单编号',
`img_url` STRING COMMENT '图片路径',
`province_id` INT COMMENT '地区',
`activity_reduce_amount` DECIMAL(16,2) COMMENT '促销金额',
`coupon_reduce_amount` DECIMAL(16,2) COMMENT '优惠券',
`original_total_amount` DECIMAL(16,2) COMMENT '原价金额',
`feight_fee` DECIMAL(16,2) COMMENT '运费',
`feight_fee_reduce` DECIMAL(16,2) COMMENT '运费减免',
`refundable_time` STRING COMMENT '可退款日期(签收后30天)'
) COMMENT '订单表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_order_info_full/';
DROP TABLE IF EXISTS dw_ods.ods_coupon_use_full;
CREATE EXTERNAL TABLE dw_ods.ods_coupon_use_full(
`id` BIGINT COMMENT '编号',
`coupon_id` BIGINT COMMENT '购物券ID',
`user_id` BIGINT COMMENT '用户ID',
`order_id` BIGINT COMMENT '订单ID',
`coupon_status` STRING COMMENT '购物券状态(1:未使用 2:已使用)',
`get_time` STRING COMMENT '获取时间',
`using_time` STRING COMMENT '使用时间',
`used_time` STRING COMMENT '支付时间',
`expire_time` STRING COMMENT '过期时间'
) COMMENT '优惠券领用表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_coupon_use_full/';
DROP TABLE IF EXISTS dw_ods.ods_order_status_log_full;
CREATE EXTERNAL TABLE dw_ods.ods_order_status_log_full(
`id` BIGINT COMMENT '编号',
`order_id` BIGINT COMMENT '订单ID',
`order_status` STRING COMMENT '订单状态',
`operate_time` STRING COMMENT '修改时间'
) COMMENT '订单状态表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_order_status_log_full/';
DROP TABLE IF EXISTS dw_ods.ods_order_detail_full;
CREATE EXTERNAL TABLE dw_ods.ods_order_detail_full(
`id` BIGINT COMMENT '编号',
`order_id` BIGINT COMMENT '订单编号',
`sku_id` BIGINT COMMENT 'sku_id',
`sku_name` STRING COMMENT 'sku名称',
`img_url` STRING COMMENT '图片名称(冗余)',
`order_price` DECIMAL(16,2) COMMENT '购买价格(下单时sku价格)',
`sku_num` STRING COMMENT '购买个数',
`create_time` STRING COMMENT '创建时间',
`source_type` STRING COMMENT '来源类型',
`source_id` BIGINT COMMENT '来源编号',
`split_total_amount` DECIMAL(16,2) COMMENT '分摊最终金额',
`split_activity_amount` DECIMAL(16,2) COMMENT '分摊活动优惠',
`split_coupon_amount` DECIMAL(16,2) COMMENT '摊优惠券优惠'
) COMMENT '订单明细表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_order_detail_full/';
DROP TABLE IF EXISTS dw_ods.ods_payment_info_full;
CREATE EXTERNAL TABLE dw_ods.ods_payment_info_full(
`id` BIGINT COMMENT '编号',
`out_trade_no` STRING COMMENT '对外业务编号',
`order_id` BIGINT COMMENT '订单编号',
`user_id` BIGINT COMMENT '用户id',
`payment_type` STRING COMMENT '支付类型(微信 支付宝)',
`trade_no` STRING COMMENT '交易编号',
`total_amount` DECIMAL(16,2) COMMENT '支付金额',
`subject` STRING COMMENT '交易内容',
`payment_status` STRING COMMENT '支付状态',
`create_time` STRING COMMENT '创建时间',
`callback_time` STRING COMMENT '回调时间',
`callback_content` STRING COMMENT '回调信息'
) COMMENT '支付信息表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_payment_info_full/';
DROP TABLE IF EXISTS dw_ods.ods_comment_info_full;
CREATE EXTERNAL TABLE dw_ods.ods_comment_info_full(
`id` BIGINT COMMENT '编号',
`user_id` BIGINT COMMENT '用户id',
`nick_name` STRING COMMENT '用户昵称',
`head_img` STRING COMMENT 'head_img',
`sku_id` BIGINT COMMENT 'skuid',
`spu_id` BIGINT COMMENT '商品id',
`order_id` BIGINT COMMENT '订单编号',
`appraise` STRING COMMENT '评价 1 好评 2 中评 3 差评',
`comment_txt` STRING COMMENT '评价内容',
`create_time` STRING COMMENT '创建时间',
`operate_time` STRING COMMENT '修改时间'
) COMMENT '商品评论表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_comment_info_full/';
DROP TABLE IF EXISTS dw_ods.ods_order_refund_info_full;
CREATE EXTERNAL TABLE dw_ods.ods_order_refund_info_full(
`id` BIGINT COMMENT '编号',
`user_id` BIGINT COMMENT '用户id',
`order_id` BIGINT COMMENT '订单id',
`sku_id` BIGINT COMMENT 'skuid',
`refund_type` STRING COMMENT '退款类型',
`refund_num` BIGINT COMMENT '退货件数',
`refund_amount` DECIMAL(16,2) COMMENT '退款金额',
`refund_reason_type` STRING COMMENT '原因类型',
`refund_reason_txt` STRING COMMENT '原因内容',
`refund_status` STRING COMMENT '退款状态(0:待审批 1:已退款)',
`create_time` STRING COMMENT '创建时间'
) COMMENT '退单表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_order_refund_info_full/';
DROP TABLE IF EXISTS dw_ods.ods_order_detail_activity_full;
CREATE EXTERNAL TABLE dw_ods.ods_order_detail_activity_full(
`id` BIGINT COMMENT '编号',
`order_id` BIGINT COMMENT '订单id',
`order_detail_id` BIGINT COMMENT '订单明细id',
`activity_id` BIGINT COMMENT '活动ID',
`activity_rule_id` BIGINT COMMENT '活动规则',
`sku_id` BIGINT COMMENT 'skuID',
`create_time` STRING COMMENT '获取时间'
) COMMENT '订单详情活动关联表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_order_detail_activity_full/';
DROP TABLE IF EXISTS dw_ods.ods_order_detail_coupon_full;
CREATE EXTERNAL TABLE dw_ods.ods_order_detail_coupon_full(
`id` BIGINT COMMENT '编号',
`order_id` BIGINT COMMENT '订单id',
`order_detail_id` BIGINT COMMENT '订单明细id',
`coupon_id` BIGINT COMMENT '购物券ID',
`coupon_use_id` BIGINT COMMENT '购物券领用id',
`sku_id` BIGINT COMMENT 'skuID',
`create_time` STRING COMMENT '获取时间'
) COMMENT '订单明细购物券表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_order_detail_coupon_full/';
DROP TABLE IF EXISTS dw_ods.ods_refund_payment_full;
CREATE EXTERNAL TABLE dw_ods.ods_refund_payment_full(
`id` BIGINT COMMENT '编号',
`out_trade_no` STRING COMMENT '对外业务编号',
`order_id` BIGINT COMMENT '订单编号',
`sku_id` BIGINT COMMENT 'sku_id',
`payment_type` STRING COMMENT '支付类型(微信 支付宝)',
`trade_no` STRING COMMENT '交易编号',
`total_amount` DECIMAL(16,2) COMMENT '退款金额',
`subject` STRING COMMENT '交易内容',
`refund_status` STRING COMMENT '退款状态',
`create_time` STRING COMMENT '创建时间',
`callback_time` STRING COMMENT '回调时间',
`callback_content` STRING COMMENT '回调信息'
) COMMENT '退款信息表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/dw_ods.db/ods_refund_payment_full/';
2.1.2 业务增量表建表语句
DROP TABLE IF EXISTS ods_order_info_inc;
CREATE EXTERNAL TABLE ods_order_info_inc (
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,
consignee :STRING,
consignee_tel :STRING,
total_amount :DECIMAL(16, 2),
order_status :STRING,
user_id:STRING,
payment_way :STRING,
delivery_address :STRING,
order_comment :STRING,
out_trade_no :STRING,
trade_body:STRING,
create_time :STRING,
operate_time :STRING,
expire_time :STRING,
process_status :STRING,
tracking_no:STRING,
parent_order_id :STRING,
img_url :STRING,
province_id :STRING,
activity_reduce_amount:DECIMAL(16, 2),
coupon_reduce_amount :DECIMAL(16, 2),
original_total_amount :DECIMAL(16, 2),
freight_fee:DECIMAL(16, 2),
freight_fee_reduce :DECIMAL(16, 2),
refundable_time :DECIMAL(16, 2)> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '订单表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/dw_ods.db/ods_order_info_inc/';
msck repair table ods_order_info_inc;
DROP TABLE IF EXISTS ods_coupon_use_inc;
CREATE EXTERNAL TABLE ods_coupon_use_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,
coupon_id :STRING,
user_id :STRING,
order_id :STRING,
coupon_status :STRING,
get_time :STRING,
using_time:STRING,
used_time :STRING,expire_time :STRING,
create_time :STRING,
operate_time :STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '优惠券领用表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/dw_ods.db/ods_coupon_use_inc/';
msck repair table ods_coupon_use_inc;
DROP TABLE IF EXISTS ods_order_status_log_inc;
CREATE EXTERNAL TABLE ods_order_status_log_inc (
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,
order_id :STRING,
order_status :STRING,
create_time :STRING,
operate_time :STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '订单状态表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/dw_ods.db/ods_order_status_log_inc/';
msck repair table ods_order_status_log_inc;
DROP TABLE IF EXISTS ods_user_info_inc;
CREATE EXTERNAL TABLE ods_user_info_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,
login_name :STRING,
nick_name :STRING,
passwd :STRING,
name :STRING,
phone_num :STRING,
email:STRING,
head_img :STRING,
user_level :STRING,
birthday :STRING,
gender :STRING,
create_time :STRING,
operate_time:STRING,
status :STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '用户表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/dw_ods.db/ods_user_info_inc/';
msck repair table ods_user_info_inc;
DROP TABLE IF EXISTS ods_order_detail_inc;
CREATE EXTERNAL TABLE ods_order_detail_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,
order_id :STRING,
sku_id :STRING,
sku_name :STRING,
img_url :STRING,
order_price:DECIMAL(16, 2),
sku_num :BIGINT,
create_time :STRING,
source_type :STRING,
source_id :STRING,
split_total_amount:DECIMAL(16, 2),
split_activity_amount :DECIMAL(16, 2),
split_coupon_amount:DECIMAL(16, 2),
operate_time :STRING> COMMENT '数据',
`old` MAP<STRING,
STRING> COMMENT '旧值'
) COMMENT '订单详情表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/dw_ods.db/ods_order_detail_inc/';
msck repair table ods_order_detail_inc;
DROP TABLE IF EXISTS ods_payment_info_inc;
CREATE EXTERNAL TABLE ods_payment_info_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,
out_trade_no :STRING,
order_id :STRING,
user_id :STRING,
payment_type :STRING,
trade_no:STRING,
total_amount :DECIMAL(16, 2),
subject :STRING,
payment_status :STRING,
create_time :STRING,
callback_time:STRING,
callback_content :STRING,
operate_time :STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '支付流水表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/dw_ods.db/ods_payment_info_inc/';
msck repair table ods_payment_info_inc;
DROP TABLE IF EXISTS ods_comment_info_inc;
CREATE EXTERNAL TABLE ods_comment_info_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,
user_id :STRING,
nick_name :STRING,
head_img :STRING,
sku_id :STRING,
spu_id :STRING,
order_id :STRING,
appraise :STRING,
comment_txt :STRING,
create_time :STRING,
operate_time :STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '商品评论表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/dw_ods.db/ods_comment_info_inc/';
msck repair table ods_comment_info_inc;
DROP TABLE IF EXISTS ods_order_refund_info_inc;
CREATE EXTERNAL TABLE ods_order_refund_info_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,
user_id :STRING,
order_id :STRING,
sku_id :STRING,
refund_type :STRING,
refund_num :BIGINT,
refund_amount:DECIMAL(16, 2),
refund_reason_type :STRING,
refund_reason_txt :STRING,
refund_status :STRING,
create_time:STRING,
operate_time :STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '退单表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/dw_ods.db/ods_order_refund_info_inc/';
msck repair table ods_order_refund_info_inc;
DROP TABLE IF EXISTS ods_order_detail_activity_inc;
CREATE EXTERNAL TABLE ods_order_detail_activity_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,
order_id :STRING,
order_detail_id :STRING,
activity_id :STRING,
activity_rule_id :STRING,
sku_id:STRING,
create_time :STRING,
operate_time :STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '订单详情活动关联表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/dw_ods.db/ods_order_detail_activity_inc/';
msck repair table ods_order_detail_activity_inc;
DROP TABLE IF EXISTS ods_order_detail_coupon_inc;
CREATE EXTERNAL TABLE ods_order_detail_coupon_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,
order_id :STRING,
order_detail_id :STRING,
coupon_id :STRING,
coupon_use_id :STRING,
sku_id:STRING,
create_time :STRING,
operate_time :STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '订单详情活动关联表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/dw_ods.db/ods_order_detail_coupon_inc/';
msck repair table ods_order_detail_coupon_inc;
DROP TABLE IF EXISTS ods_refund_payment_inc;
CREATE EXTERNAL TABLE ods_refund_payment_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,
out_trade_no :STRING,
order_id :STRING,
sku_id :STRING,
payment_type :STRING,
trade_no :STRING,
total_amount:DECIMAL(16, 2),
subject :STRING,
refund_status :STRING,
create_time :STRING,
callback_time :STRING,
callback_content:STRING,
operate_time :STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '支付流水表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/dw_ods.db/ods_refund_payment_inc/';
msck repair table ods_refund_payment_inc;
CREATE EXTERNAL TABLE IF NOT EXISTS `dw_ods.ods_cart_info_inc` (
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,
user_id :STRING,
sku_id :STRING,
cart_price :DECIMAL(16, 2),
sku_num :BIGINT,
img_url :STRING,
sku_name :STRING,
is_checked :STRING,
create_time :STRING,
operate_time :STRING,
is_ordered :STRING,
order_time:STRING,
source_type:STRING,
source_id:BIGINT> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '购物车增量表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/dw_ods.db/ods_cart_info_inc/';
2.1.3 流量增量表建表语句
DROP TABLE IF EXISTS ods_log_inc;
CREATE EXTERNAL TABLE ods_log_inc (`line` string)
PARTITIONED BY (`dt` string)
LOCATION '/warehouse/dw_ods.db/ods_log_inc/';
2.2 数据装载
省份表和城市表只需首次导入一次,其他业务表需要每天都装载。
2.2.1 初始化装载省份和地区表
[hadoop@hadoop102 ~]$ hive -e "load data inpath '/warehouse/db/gmall/base_province_full/2023-12-02' OVERWRITE into table dw_ods.ods_base_province;"
[hadoop@hadoop102 ~]$ hive -e "load data inpath '/warehouse/db/gmall/base_region_full/2023-12-02' OVERWRITE into table dw_ods.ods_base_region;"
2.2.2 业务数据装载
注意1:要在Sqoop数据同步成功之后运行。
注意2:crontab无法直接处理任务依赖关系!
TODO:如何判定Sqoop同步成功???? 如何在其成功后运行???
(1) 开发脚本
[hadoop@hadoop102 ~]$ cd /home/hadoop/bin/
[hadoop@hadoop102 bin]$ vim hdfs_to_ods_db.sh
脚本内容:
#!/bin/bash
APP=dw_ods
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$2" ] ;then
do_date=$2
else
do_date=`date -d "-1 day" +%F`
fi
sku_info_full="load data inpath '/warehouse/db/gmall/sku_info_full/$do_date' OVERWRITE into table ${APP}.ods_sku_info_full partition(dt='$do_date');"
base_category1_full="load data inpath '/warehouse/db/gmall/base_category1_full/$do_date' OVERWRITE into table ${APP}.ods_base_category1_full partition(dt='$do_date');"
base_category2_full="load data inpath '/warehouse/db/gmall/base_category2_full/$do_date' OVERWRITE into table ${APP}.ods_base_category2_full partition(dt='$do_date');"
base_category3_full="load data inpath '/warehouse/db/gmall/base_category3_full/$do_date' OVERWRITE into table ${APP}.ods_base_category3_full partition(dt='$do_date');"
base_trademark_full="load data inpath '/warehouse/db/gmall/base_trademark_full/$do_date' OVERWRITE into table ${APP}.ods_base_trademark_full partition(dt='$do_date');"
spu_info_full="load data inpath '/warehouse/db/gmall/spu_info_full/$do_date' OVERWRITE into table ${APP}.ods_spu_info_full partition(dt='$do_date');"
favor_info_full="load data inpath '/warehouse/db/gmall/favor_info_full/$do_date' OVERWRITE into table ${APP}.ods_favor_info_full partition(dt='$do_date'); "
cart_info_full="load data inpath '/warehouse/db/gmall/cart_info_full/$do_date' OVERWRITE into table ${APP}.ods_cart_info_full partition(dt='$do_date'); "
coupon_info_full="load data inpath '/warehouse/db/gmall/coupon_info_full/$do_date' OVERWRITE into table ${APP}.ods_coupon_info_full partition(dt='$do_date'); "
activity_info_full="load data inpath '/warehouse/db/gmall/activity_info_full/$do_date' OVERWRITE into table ${APP}.ods_activity_info_full partition(dt='$do_date'); "
activity_rule_full="load data inpath '/warehouse/db/gmall/activity_rule_full/$do_date' OVERWRITE into table ${APP}.ods_activity_rule_full partition(dt='$do_date'); "
base_dic_full="load data inpath '/warehouse/db/gmall/base_dic_full/$do_date' OVERWRITE into table ${APP}.ods_base_dic_full partition(dt='$do_date'); "
sku_attr_value_full="load data inpath '/warehouse/db/gmall/sku_attr_value_full/$do_date' OVERWRITE into table ${APP}.ods_sku_attr_value_full partition(dt='$do_date'); "
sku_sale_attr_value_full="load data inpath '/warehouse/db/gmall/sku_sale_attr_value_full/$do_date' OVERWRITE into table ${APP}.ods_sku_sale_attr_value_full partition(dt='$do_date'); "
user_info_full="load data inpath '/warehouse/db/gmall/user_info_full/$do_date' OVERWRITE into table ${APP}.ods_user_info_full partition(dt='$do_date');"
order_info_full="load data inpath '/warehouse/db/gmall/order_info_full/$do_date' OVERWRITE into table ${APP}.ods_order_info_full partition(dt='$do_date');"
coupon_use_full="load data inpath '/warehouse/db/gmall/coupon_use_full/$do_date' OVERWRITE into table ${APP}.ods_coupon_use_full partition(dt='$do_date');"
order_status_log_full="load data inpath '/warehouse/db/gmall/order_status_log_full/$do_date' OVERWRITE into table ${APP}.ods_order_status_log_full partition(dt='$do_date');"
order_detail_full="load data inpath '/warehouse/db/gmall/order_detail_full/$do_date' OVERWRITE into table ${APP}.ods_order_detail_full partition(dt='$do_date');"
payment_info_full="load data inpath '/warehouse/db/gmall/payment_info_full/$do_date' OVERWRITE into table ${APP}.ods_payment_info_full partition(dt='$do_date');"
comment_info_full="load data inpath '/warehouse/db/gmall/comment_info_full/$do_date' OVERWRITE into table ${APP}.ods_comment_info_full partition(dt='$do_date');"
order_refund_info_full="load data inpath '/warehouse/db/gmall/order_refund_info_full/$do_date' OVERWRITE into table ${APP}.ods_order_refund_info_full partition(dt='$do_date');"
order_detail_activity_full="load data inpath '/warehouse/db/gmall/order_detail_activity_full/$do_date' OVERWRITE into table ${APP}.ods_order_detail_activity_full partition(dt='$do_date');"
order_detail_coupon_full="load data inpath '/warehouse/db/gmall/order_detail_coupon_full/$do_date' OVERWRITE into table ${APP}.ods_order_detail_coupon_full partition(dt='$do_date');"
refund_payment_full="load data inpath '/warehouse/db/gmall/refund_payment_full/$do_date' OVERWRITE into table ${APP}.ods_refund_payment_full partition(dt='$do_date');"
order_info_inc="load data inpath '/warehouse/db/gmall/order_info_inc/$do_date' OVERWRITE into table ${APP}.ods_order_info_inc partition(dt='$do_date'); "
coupon_use_inc="load data inpath '/warehouse/db/gmall/coupon_use_inc/$do_date' OVERWRITE into table ${APP}.ods_coupon_use_inc partition(dt='$do_date'); "
order_status_log_inc="load data inpath '/warehouse/db/gmall/order_status_log_inc/$do_date' OVERWRITE into table ${APP}.ods_order_status_log_inc partition(dt='$do_date');"
user_info_inc="load data inpath '/warehouse/db/gmall/user_info_inc/$do_date' OVERWRITE into table ${APP}.ods_user_info_inc partition(dt='$do_date'); "
order_detail_inc="load data inpath '/warehouse/db/gmall/order_detail_inc/$do_date' OVERWRITE into table ${APP}.ods_order_detail_inc partition(dt='$do_date'); "
payment_info_inc="load data inpath '/warehouse/db/gmall/payment_info_inc/$do_date' OVERWRITE into table ${APP}.ods_payment_info_inc partition(dt='$do_date'); "
comment_info_inc="load data inpath '/warehouse/db/gmall/comment_info_inc/$do_date' OVERWRITE into table ${APP}.ods_comment_info_inc partition(dt='$do_date'); "
order_refund_info_inc="load data inpath '/warehouse/db/gmall/order_refund_info_inc/$do_date' OVERWRITE into table ${APP}.ods_order_refund_info_inc partition(dt='$do_date'); "
order_detail_activity_inc="load data inpath '/warehouse/db/gmall/order_detail_activity_inc/$do_date' OVERWRITE into table ${APP}.ods_order_detail_activity_inc partition(dt='$do_date'); "
order_detail_coupon_inc="load data inpath '/warehouse/db/gmall/order_detail_coupon_inc/$do_date' OVERWRITE into table ${APP}.ods_order_detail_coupon_inc partition(dt='$do_date');"
refund_payment_inc="load data inpath '/warehouse/db/gmall/refund_payment_inc/$do_date' OVERWRITE into table ${APP}.ods_refund_payment_inc partition(dt='$do_date');"
cart_info_inc="load data inpath '/warehouse/db/gmall/cart_info_inc/$do_date' OVERWRITE into table ${APP}.ods_cart_info_inc partition(dt='$do_date');"
case $1 in
"sku_info_full"){
hive -e "$sku_info_full"
};;
"base_category1_full"){
hive -e "$base_category1_full"
};;
"base_category2_full"){
hive -e "$base_category2_full"
};;
"base_category3_full"){
hive -e "$base_category3_full"
};;
"base_trademark_full"){
hive -e "$base_trademark_full"
};;
"spu_info_full"){
hive -e "$spu_info_full"
};;
"favor_info_full"){
hive -e "$favor_info_full"
};;
"cart_info_full"){
hive -e "$cart_info_full"
};;
"coupon_info_full"){
hive -e "$coupon_info_full"
};;
"activity_info_full"){
hive -e "$activity_info_full"
};;
"activity_rule_full"){
hive -e "$activity_rule_full"
};;
"base_dic_full"){
hive -e "$base_dic_full"
};;
"sku_attr_value_full"){
hive -e "$sku_attr_value_full"
};;
"sku_sale_attr_value_full"){
hive -e "$sku_sale_attr_value_full"
};;
"user_info_full"){
hive -e "$user_info_full"
};;
"order_info_full"){
hive -e "$order_info_full"
};;
"coupon_use_full"){
hive -e "$coupon_use_full"
};;
"order_status_log_full"){
hive -e "$order_status_log_full"
};;
"order_detail_full"){
hive -e "$order_detail_full"
};;
"payment_info_full"){
hive -e "$payment_info_full"
};;
"comment_info_full"){
hive -e "$comment_info_full"
};;
"order_refund_info_full"){
hive -e "$order_refund_info_full"
};;
"order_detail_activity_full"){
hive -e "$order_detail_activity_full"
};;
"order_detail_coupon_full"){
hive -e "$order_detail_coupon_full"
};;
"refund_payment_full"){
hive -e "$refund_payment_full"
};;
"order_info_inc"){
hive -e "$order_info_inc"
};;
"coupon_use_inc"){
hive -e "$coupon_use_inc"
};;
"order_status_log_inc"){
hive -e "$order_status_log_inc"
};;
"user_info_inc"){
hive -e "$user_info_inc"
};;
"order_detail_inc"){
hive -e "$order_detail_inc"
};;
"payment_info_inc"){
hive -e "$payment_info_inc"
};;
"comment_info_inc"){
hive -e "$comment_info_inc"
};;
"order_refund_info_inc"){
hive -e "$order_refund_info_inc"
};;
"order_detail_activity_inc"){
hive -e "$order_detail_activity_inc"
};;
"order_detail_coupon_inc"){
hive -e "$order_detail_coupon_inc"
};;
"refund_payment_inc"){
hive -e "$refund_payment_inc"
};;
"cart_info_inc"){
hive -e "$cart_info_inc"
};;
"all"){
hive -e "$sku_info_full$base_category1_full$base_category2_full$base_category3_full$base_trademark_full$spu_info_full$favor_info_full$cart_info_full$coupon_info_full$activity_info_full$activity_rule_full$base_dic_full$sku_attr_value_full$sku_sale_attr_value_full$user_info_full$order_info_full$coupon_use_full$order_status_log_full$order_detail_full$payment_info_full$comment_info_full$order_refund_info_full$order_detail_activity_full$order_detail_coupon_full$refund_payment_full$order_info_inc$coupon_use_inc$order_status_log_inc$user_info_inc$order_detail_inc$payment_info_inc$comment_info_inc$order_refund_info_inc$order_detail_activity_inc$order_detail_coupon_inc$refund_payment_inc$cart_info_inc"
};;
esac
(2) 授予脚本执行权限
[hadoop@hadoop102 bin]$ chmod +x hdfs_to_ods_db.sh
(3) 定时调度
每天凌晨3点装载业务数据
[hadoop@hadoop102 ~]$ crontab -e
新增内容:
# 每天凌晨3点装载业务数据
0 3 * * * hdfs_to_ods_db.sh all
2.2.3 日志数据装载
(1) 开发脚本
[hadoop@hadoop102 ~]$ cd /home/hadoop/bin/
[hadoop@hadoop102 bin]$ vim hdfs_to_ods_log.sh
脚本内容:
#!/bin/bash
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
echo ================== 日志日期为 $do_date ==================
sql="
load data inpath '/warehouse/applog/gmall/tracking_log/$do_date' into table dw_ods.ods_log_inc partition(dt='$do_date');
"
hive -e "$sql"
(2) 授予脚本执行权限
[hadoop@hadoop102 bin]$ chmod +x hdfs_to_ods_log.sh
(3) 定时调度
凌晨3点执行,要等昨天的日志数据同步完成。
关键是保证白天时数仓能查到昨天分区的数据。
[hadoop@hadoop102 ~]$ crontab -e
新增内容:
# 每天凌晨3点装载用户日志数据增量表
0 3 * * * hdfs_to_ods_log.sh