問題描述:
ods層數據量過大,實現先按月分區,再在按月分區的基礎山,按照天進行分區,可以每次同步三個月的數據,利用shell腳本實現
具體的做法:
1.整體導入
1.1建表
drop table if exists ods_tb_readonly_invoice_data_full;
create EXTERNAL table ods_tb_readonly_invoice_data_full
(
vbeln string comment '發票號碼',
sonum string comment 'so單號',
xblnr string comment '参考凭证编号',
dnnum string comment 'DN單號',
posnr string comment '销售和分销凭证的项目号',
knumv string comment '单据条件数',
fkart string comment 'F2:正常出货;RE: 退货;L2: Debit Memo (額外多收客人钱);G2 : Credit Memo (退钱给客人);S1: 取消出货S2: 取消Credit Memo',
FKDAT string comment '發票日期',
erdat string comment '創建日期',
kunrg string comment '付款方',
kunag string comment 'Sold-to party送达方',
dname string comment '司机名',
truck string comment '货车',
dstop string comment '站数',
zterm string comment '支付条款',
invamt decimal(15, 2) comment '凭证货币计量的净价值',
invcost decimal(15, 2) comment '凭证货币计量的净价值',
matnr string comment '物料號',
arktx string comment '物料描述',
matkl string comment 'Material Group',
ekgrp string comment '採購組',
prodh string comment 'Product hierarchy',
fkimg decimal(13, 3) comment '出貨數量',
vrkme string comment '销售单位(NIO备注:对于系统来说这是销售单位,但对于sales来说基本单位才是卖给客人的销售单位)',
umvkz bigint comment '销售数量转换成SKU的分子(因子)',
umvkn bigint comment '销售数量转换为 SKU 的值(除数)',
meins string comment '基本单位',
ntgew decimal(13, 3) comment '磅數',
brgew decimal(13, 3) comment '毛重',
gewei string comment '淨重',
iamt decimal(15, 2) comment '客戶售價',
imap decimal(15, 2) comment '成本價',
icost decimal(15, 2) comment '給SALES的價格',
idisc decimal(15, 2) comment '凭证货币计量的净价值',
itax decimal(13, 2) comment '凭证货币税额',
werks string comment '工廠',
vkgrp string comment '銷售組',
vkbur string comment '銷售部門',
salesgr string comment '對應的Sales',
salesoff string comment '對應的Sales的直屬上級',
zzshiptype string comment 'Ship Type',
aupos string comment '销售和分销凭证的项目号',
kdkg1 string comment 'Reservation Indicator',
auart string comment 'SO訂單類型:ZIS :銷售合同出貨',
pstyv string comment 'TANN為FREE標記',
zwintme01 string comment '送货时间1',
zwintme02 string comment '送货时间2',
zwintme03 string comment '送货时间3',
zwintme04 string comment '送货时间4',
vsbed string comment 'Shipping Conditions',
city1 string comment 'Ship-to party City',
invamtitax decimal(15, 2) comment 'Amount with tax(含税的订单总价钱)'
)
comment '接口時按發票創建日期(ERDAT)查詢數據'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
LOCATION '/warehouse/db_core/ods/ods_tb_readonly_invoice_data_full/';
1.2整體數據導入
-- =============================數據加載===================================================================
load data inpath '/tb_readonly_invoice_data_full/2023-02-13' into table ods_tb_readonly_invoice_data_full partition(dt='2023-02-13');
- 按月分區
2.1 建表
drop table if exists db_core.ods_tb_readonly_invoice_data_inc_month ;
create EXTERNAL table ods_tb_readonly_invoice_data_inc_month
(
vbeln string comment '發票號碼',
sonum string comment 'so單號',
xblnr string comment '参考凭证编号',
dnnum string comment 'DN單號',
posnr string comment '销售和分销凭证的项目号',
knumv string comment '单据条件数',
fkart string comment 'F2:正常出货;RE: 退货;L2: Debit Memo (額外多收客人钱);G2 : Credit Memo (退钱给客人);S1: 取消出货S2: 取消Credit Memo',
FKDAT string comment '發票日期',
erdat string comment '創建日期',
kunrg string comment '付款方',
kunag string comment 'Sold-to party送达方',
dname string comment '司机名',
truck string comment '货车',
dstop string comment '站数',
zterm string comment '支付条款',
invamt decimal(15, 2) comment '凭证货币计量的净价值',
invcost decimal(15, 2) comment '凭证货币计量的净价值',
matnr string comment '物料號',
arktx string comment '物料描述',
matkl string comment 'Material Group',
ekgrp string comment '採購組',
prodh string comment 'Product hierarchy',
fkimg decimal(13, 3) comment '出貨數量',
vrkme string comment '销售单位(NIO备注:对于系统来说这是销售单位,但对于sales来说基本单位才是卖给客人的销售单位)',
umvkz bigint comment '销售数量转换成SKU的分子(因子)',
umvkn bigint comment '销售数量转换为 SKU 的值(除数)',
meins string comment '基本单位',
ntgew decimal(13, 3) comment '磅數',
brgew decimal(13, 3) comment '毛重',
gewei string comment '淨重',
iamt decimal(15, 2) comment '客戶售價',
imap decimal(15, 2) comment '成本價',
icost decimal(15, 2) comment '給SALES的價格',
idisc decimal(15, 2) comment '凭证货币计量的净价值',
itax decimal(13, 2) comment '凭证货币税额',
werks string comment '工廠',
vkgrp string comment '銷售組',
vkbur string comment '銷售部門',
salesgr string comment '對應的Sales',
salesoff string comment '對應的Sales的直屬上級',
zzshiptype string comment 'Ship Type',
aupos string comment '销售和分销凭证的项目号',
kdkg1 string comment 'Reservation Indicator',
auart string comment 'SO訂單類型:ZIS :銷售合同出貨',
pstyv string comment 'TANN為FREE標記',
zwintme01 string comment '送货时间1',
zwintme02 string comment '送货时间2',
zwintme03 string comment '送货时间3',
zwintme04 string comment '送货时间4',
vsbed string comment 'Shipping Conditions',
city1 string comment 'Ship-to party City',
invamtitax decimal(15, 2) comment 'Amount with tax(含税的订单总价钱)'
)
comment '接口時按發票創建日期(ERDAT)查詢數據'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
LOCATION '/warehouse/db_core/ods/ods_tb_readonly_invoice_data_inc_month/';
1.2插入數據
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=10000;
set hive.exec.max.dynamic.partitions.pernode= 2000;
insert overwrite table ods_tb_readonly_invoice_data_inc_month partition (dt)
select vbeln,
sonum,
xblnr,
dnnum,
posnr,
knumv,
fkart,
fkdat,
erdat,
kunrg,
kunag,
dname,
truck,
dstop,
zterm,
invamt,
invcost,
matnr,
arktx,
matkl,
ekgrp,
prodh,
fkimg,
vrkme,
umvkz,
umvkn,
meins,
ntgew,
brgew,
gewei,
iamt,
imap,
icost,
idisc,
itax,
werks,
vkgrp,
vkbur,
salesgr,
salesoff,
zzshiptype,
aupos,
kdkg1,
auart,
pstyv,
zwintme01,
zwintme02,
zwintme03,
zwintme04,
vsbed,
city1,
invamtitax,
date_format(fkdat,'yyyy-MM') dt
from ods_tb_readonly_invoice_data_full
3 按照三個月的範圍,按照日期每日導入數據
3.1建表
drop table if exists ods_tb_readonly_invoice_data_day;
create EXTERNAL table ods_tb_readonly_invoice_data_day
(
vbeln string comment '發票號碼',
sonum string comment 'so單號',
xblnr string comment '参考凭证编号',
dnnum string comment 'DN單號',
posnr string comment '销售和分销凭证的项目号',
knumv string comment '单据条件数',
fkart string comment 'F2:正常出货;RE: 退货;L2: Debit Memo (額外多收客人钱);G2 : Credit Memo (退钱给客人);S1: 取消出货S2: 取消Credit Memo',
erdat string comment '創建日期',
kunrg string comment '付款方',
kunag string comment 'Sold-to party送达方',
dname string comment '司机名',
truck string comment '货车',
dstop string comment '站数',
zterm string comment '支付条款',
invamt decimal(15, 2) comment '凭证货币计量的净价值',
invcost decimal(15, 2) comment '凭证货币计量的净价值',
matnr string comment '物料號',
arktx string comment '物料描述',
matkl string comment 'Material Group',
ekgrp string comment '採購組',
prodh string comment 'Product hierarchy',
fkimg decimal(13, 3) comment '出貨數量',
vrkme string comment '销售单位(NIO备注:对于系统来说这是销售单位,但对于sales来说基本单位才是卖给客人的销售单位)',
umvkz bigint comment '销售数量转换成SKU的分子(因子)',
umvkn bigint comment '销售数量转换为 SKU 的值(除数)',
meins string comment '基本单位',
ntgew decimal(13, 3) comment '磅數',
brgew decimal(13, 3) comment '毛重',
gewei string comment '淨重',
iamt decimal(15, 2) comment '客戶售價',
imap decimal(15, 2) comment '成本價',
icost decimal(15, 2) comment '給SALES的價格',
idisc decimal(15, 2) comment '凭证货币计量的净价值',
itax decimal(13, 2) comment '凭证货币税额',
werks string comment '工廠',
vkgrp string comment '銷售組',
vkbur string comment '銷售部門',
salesgr string comment '對應的Sales',
salesoff string comment '對應的Sales的直屬上級',
zzshiptype string comment 'Ship Type',
aupos string comment '销售和分销凭证的项目号',
kdkg1 string comment 'Reservation Indicator',
auart string comment 'SO訂單類型:ZIS :銷售合同出貨',
pstyv string comment 'TANN為FREE標記',
zwintme01 string comment '送货时间1',
zwintme02 string comment '送货时间2',
zwintme03 string comment '送货时间3',
zwintme04 string comment '送货时间4',
vsbed string comment 'Shipping Conditions',
city1 string comment 'Ship-to party City',
invamtitax decimal(15, 2) comment 'Amount with tax(含税的订单总价钱)'
)
comment '接口時按發票創建日期(ERDAT)查詢數據'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
LOCATION '/warehouse/db_core/ods/ods_tb_readonly_invoice_data_day/';
3.2 數據加載
use db_core;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=10000;
set hive.exec.max.dynamic.partitions.pernode=2000;
insert overwrite table ods_tb_readonly_invoice_data_day partition(dt)
select
vbeln,
sonum,
xblnr,
dnnum,
posnr,
knumv,
fkart,
erdat,
kunrg,
kunag,
dname,
truck,
dstop,
zterm,
invamt,
invcost,
matnr,
arktx,
matkl,
ekgrp,
prodh,
fkimg,
vrkme,
umvkz,
umvkn,
meins,
ntgew,
brgew,
gewei,
iamt,
imap,
icost,
idisc,
itax,
werks,
vkgrp,
vkbur,
salesgr,
salesoff,
zzshiptype,
aupos,
kdkg1,
auart,
pstyv,
zwintme01,
zwintme02,
zwintme03,
zwintme04,
vsbed,
city1,
invamtitax,
FKDAT
from ods_tb_readonly_invoice_data_inc_month
where FKDAT>='2015-11-30' and FKDAT<='2016-01-01';
3.3 每個三個月輸出的腳本
#!/bin/bash
start_date="2015-11-30"
end_date="2016-12-31"
start_month=`date -d "$start_date" +%Y-%m`
end_month=`date -d "$end_date" +%Y-%m`
diff_months=$(( ($(date -d $end_date +%s) - $(date -d $start_date +%s)) / (30*24*3600) ))
if [ $diff_months -gt 3 ];
then
while [ "$start_month" != "$end_month" ]
do
start_day=`date -d "$start_date" +%Y-%m-%d`
end_day=`date -d "$(date -d "$start_month-01 +3 months -1 day")" +%Y-%m-%d`
if [ `date -d "$end_day" +%Y-%m` \> "$end_month" ]; then
end_day=`date -d "$end_month-01 +1 month -1 day" +%Y-%m-%d`
fi
start_date=`date -d "$(date -d "$start_month-01 +1 month")" +%Y-%m-%d`
if [ `date -d "$start_date" +%Y-%m` \> "$end_month" ]; then
start_month="$end_month"
else
start_month=`date -d "$start_date" +%Y-%m`
fi
echo "start_day:$start_day,end_day:$end_day"
done
else
start_day=`date -d "$start_date" +%Y-%m-%d`
end_day=`date -d "$end_date" +%Y-%m-%d`
echo "start_day:$start_day,end_day:$end_day"
fi
效果:
3.3自動化腳本
#!/bin/bash
start_date=$1
end_date=$2
start_month=`date -d "$start_date" +%Y-%m`
end_month=`date -d "$end_date" +%Y-%m`
diff_months=$(( ($(date -d $end_date +%s) - $(date -d $start_date +%s)) / (30*24*3600) ))
if [ $diff_months -gt 3 ];
then
while [ "$start_month" != "$end_month" ]
do
start_day=`date -d "$start_date" +%Y-%m-%d`
end_day=`date -d "$(date -d "$start_month-01 +3 months -1 day")" +%Y-%m-%d`
if [ `date -d "$end_day" +%Y-%m` \> "$end_month" ]; then
end_day=`date -d "$end_month-01 +1 month -1 day" +%Y-%m-%d`
fi
start_date=`date -d "$(date -d "$start_month-01 +1 month")" +%Y-%m-%d`
if [ `date -d "$start_date" +%Y-%m` \> "$end_month" ]; then
start_month="$end_month"
else
start_month=`date -d "$start_date" +%Y-%m`
fi
echo "start_day:$start_day,end_day:$end_day"
/opt/module/hive/bin/hive -e "use db_core;set hive.exec.dynamic.partition.mode=nonstrict;set hive.exec.max.dynamic.partitions=10000;set hive.exec.max.dynamic.partitions.pernode=2000;
insert overwrite table db_core.ods_tb_readonly_invoice_data_day partition(dt)
select
vbeln,
sonum,
xblnr,
dnnum,
posnr,
knumv,
fkart,
erdat,
kunrg,
kunag,
dname,
truck,
dstop,
zterm,
invamt,
invcost,
matnr,
arktx,
matkl,
ekgrp,
prodh,
fkimg,
vrkme,
umvkz,
umvkn,
meins,
ntgew,
brgew,
gewei,
iamt,
imap,
icost,
idisc,
itax,
werks,
vkgrp,
vkbur,
salesgr,
salesoff,
zzshiptype,
aupos,
kdkg1,
auart,
pstyv,
zwintme01,
zwintme02,
zwintme03,
zwintme04,
vsbed,
city1,
invamtitax,
fkdat
from db_core.ods_tb_readonly_invoice_data_inc_month
where fkdat>='$start_day' and fkdat<='$end_day';"
done
else
start_day=`date -d "$start_date" +%Y-%m-%d`
end_day=`date -d "$end_date" +%Y-%m-%d`
echo "start_day:$start_day,end_day:$end_day"
/opt/module/hive/bin/hive -e "use db_core;set hive.exec.dynamic.partition.mode=nonstrict;set hive.exec.max.dynamic.partitions=10000;set hive.exec.max.dynamic.partitions.pernode=2000;
insert overwrite table db_core.ods_tb_readonly_invoice_data_day partition(dt)
select
vbeln,
sonum,
xblnr,
dnnum,
posnr,
knumv,
fkart,
erdat,
kunrg,
kunag,
dname,
truck,
dstop,
zterm,
invamt,
invcost,
matnr,
arktx,
matkl,
ekgrp,
prodh,
fkimg,
vrkme,
umvkz,
umvkn,
meins,
ntgew,
brgew,
gewei,
iamt,
imap,
icost,
idisc,
itax,
werks,
vkgrp,
vkbur,
salesgr,
salesoff,
zzshiptype,
aupos,
kdkg1,
auart,
pstyv,
zwintme01,
zwintme02,
zwintme03,
zwintme04,
vsbed,
city1,
invamtitax,
fkdat
from db_core.ods_tb_readonly_invoice_data_inc_month
where fkdat>='$start_day' and fkdat<='$end_day';"
fi
效果:
總結:
對於很大的表數據,可以一開始先按月進行分區,按月分區之後,再在按月分區的基礎上,按照天進行分區,可以每次同步三個月的數據,比每次同步整個時間週期的效果要好很多