1 业务数据
业务数据一般存入mysql中,过程较为单一简单,不再赘述
2 业务数据导入数仓
2.1 表的同步策略
序号 | 表名 | 解释 | 同步策略 |
1 | order_info | 订单表 | 新增及变化 |
2 | order_detail | 订单详情 | 增量 |
3 | sku_info | 商品表 | 全量 |
4 | user_info | 用户表 | 全量 |
5 | base_category1 | 商品一级分类表 | 全量 |
6 | base_category2 | 商品二级分类表 | 全量 |
7 | base_category3 | 商品三级分类表 | 全量 |
8 | payment_info | 支付流水表 | 增量 |
2.2 Sqoop导入脚本
#!/bin/bash
db_date=$2
echo ${db_date}
db_name=gmall
import_data(){
/opt/module/sqoop/bin/sqoop import \
--connect jdbc:mysql://hadoop:3306/${db_name} \
--username root \
--password 000000 \
--target-dir /origin_data/${db_name}/db/$1/${db_date} \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by "\t" \
--query "$2"' and ${conditions};'
}
import_sku_info(){
import_data "sku_info" "
SELECT
id,
spu_id,
price,
sku_name,
sku_desc,
weight,
tm_id,
category3_id,
create_time
FROM sku_info
WHERE 1=1 #全量导入,恒等,防止sql注入
"
}
import_user_info(){
import_data "user_info" "
SELECT
id,
name,
birthday,
gender,
email,
user_level,
create_time
FROM user_info
WHERE 1=1
"
}
import_base_category1(){
import_data "base_category1" "
SELECT
id,
name
FROM base_category1
WHERE 1=1
"
}
import_base_category2(){
import_data "base_category2" "
SELECT
id,
name
FROM base_category2
WHERE 1=1
"
}
import_base_category3(){
import_data "base_category3" "
SELECT
id,
name
FROM base_category3
WHERE 1=1
"
}
import_order_detail(){
import_data "order_detail" "
SELECT
od.id,
order_id,
user_id,
sku_id,
sku_name,
order_price,
sku_num,
o.create_time
FROM order_detail
WHERE
o.id=od.order_id
AND DATE_FORMAT(create_time,'%Y-%m-%d')='${db_date}'
"
}
import_payment_info(){
import_data "payment_info" "
SELECT
id,
out_trade_no,
order_id,
user_id,
alipay_trade_no,
total_amount,
subject,
payment_type,
payment_time
FROM payment_info
WHERE DATE_FORMAT(create_time,'%Y-%m-%d')='${db_date}'
"
}
import_order_info(){
import_data "order_info" "
SELECT
id,
total_amount,
order_status,
user_id,
payment_way,
out_trade_no,
create_time,
operate_tome
FROM order_info
WHERE
DATE_FORMAT(create_time,'%Y-%m-%d')='${db_date}'
OR DATE_FORMAT(operate_tome,'%Y-%m-%d')='${db_date}'
"
}
case $l in
"base_category1")
import_base_category1
;;
"base_category2")
import_base_category2
;;
"base_category3")
import_base_category3
;;
"order_info")
import_order_info
;;
"order_detail")
import_order_detail
;;
"sku_info")
import_sku_info
;;
"user_info")
import_user_info
;;
"paryment_info")
import_paryment_info
;;
"order_info")
import_order_info
;;
"all")
import_base_catego