ods層數據量過大,實現先按月分區,再在按月分區的基礎山,按照天進行分區,可以每次同步三個月的數據,利用shell腳本實現

20 篇文章 0 订阅

問題描述:
ods層數據量過大,實現先按月分區,再在按月分區的基礎山,按照天進行分區,可以每次同步三個月的數據,利用shell腳本實現
具體的做法:

1.整體導入
1.1建表

drop table if exists ods_tb_readonly_invoice_data_full;
create EXTERNAL table ods_tb_readonly_invoice_data_full
(
    vbeln      string comment '發票號碼',
    sonum      string comment 'so單號',
    xblnr      string comment '参考凭证编号',
    dnnum      string comment 'DN單號',
    posnr      string comment '销售和分销凭证的项目号',
    knumv      string comment '单据条件数',
    fkart      string comment 'F2:正常出货;RE: 退货;L2: Debit Memo (額外多收客人钱);G2 : Credit Memo (退钱给客人);S1: 取消出货S2: 取消Credit Memo',
    FKDAT      string comment '發票日期',
    erdat      string comment '創建日期',
    kunrg      string comment '付款方',
    kunag      string comment 'Sold-to party送达方',
    dname      string comment '司机名',
    truck      string comment '货车',
    dstop      string comment '站数',
    zterm      string comment '支付条款',
    invamt     decimal(15, 2) comment '凭证货币计量的净价值',
    invcost    decimal(15, 2) comment '凭证货币计量的净价值',
    matnr      string comment '物料號',
    arktx      string comment '物料描述',
    matkl      string comment 'Material Group',
    ekgrp      string comment '採購組',
    prodh      string comment 'Product hierarchy',
    fkimg      decimal(13, 3) comment '出貨數量',
    vrkme      string comment '销售单位(NIO备注:对于系统来说这是销售单位,但对于sales来说基本单位才是卖给客人的销售单位)',
    umvkz      bigint comment '销售数量转换成SKU的分子(因子)',
    umvkn      bigint comment '销售数量转换为 SKU 的值(除数)',
    meins      string comment '基本单位',
    ntgew      decimal(13, 3) comment '磅數',
    brgew      decimal(13, 3) comment '毛重',
    gewei      string comment '淨重',
    iamt       decimal(15, 2) comment '客戶售價',
    imap       decimal(15, 2) comment '成本價',
    icost      decimal(15, 2) comment '給SALES的價格',
    idisc      decimal(15, 2) comment '凭证货币计量的净价值',
    itax       decimal(13, 2) comment '凭证货币税额',
    werks      string comment '工廠',
    vkgrp      string comment '銷售組',
    vkbur      string comment '銷售部門',
    salesgr    string comment '對應的Sales',
    salesoff   string comment '對應的Sales的直屬上級',
    zzshiptype string comment 'Ship Type',
    aupos      string comment '销售和分销凭证的项目号',
    kdkg1      string comment 'Reservation Indicator',
    auart      string comment 'SO訂單類型:ZIS :銷售合同出貨',
    pstyv      string comment 'TANN為FREE標記',
    zwintme01  string comment '送货时间1',
    zwintme02  string comment '送货时间2',
    zwintme03  string comment '送货时间3',
    zwintme04  string comment '送货时间4',
    vsbed      string comment 'Shipping Conditions',
    city1      string comment 'Ship-to party City',
    invamtitax decimal(15, 2) comment 'Amount with tax(含税的订单总价钱)'
)
    comment '接口時按發票創建日期(ERDAT)查詢數據'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
    NULL DEFINED AS ''
    LOCATION '/warehouse/db_core/ods/ods_tb_readonly_invoice_data_full/';

1.2整體數據導入

-- =============================數據加載===================================================================
load data inpath '/tb_readonly_invoice_data_full/2023-02-13' into table ods_tb_readonly_invoice_data_full partition(dt='2023-02-13');
  1. 按月分區
    2.1 建表
drop table if exists db_core.ods_tb_readonly_invoice_data_inc_month ;
create EXTERNAL table ods_tb_readonly_invoice_data_inc_month
(
    vbeln      string comment '發票號碼',
    sonum      string comment 'so單號',
    xblnr      string comment '参考凭证编号',
    dnnum      string comment 'DN單號',
    posnr      string comment '销售和分销凭证的项目号',
    knumv      string comment '单据条件数',
    fkart      string comment 'F2:正常出货;RE: 退货;L2: Debit Memo (額外多收客人钱);G2 : Credit Memo (退钱给客人);S1: 取消出货S2: 取消Credit Memo',
    FKDAT      string comment '發票日期',
    erdat      string comment '創建日期',
    kunrg      string comment '付款方',
    kunag      string comment 'Sold-to party送达方',
    dname      string comment '司机名',
    truck      string comment '货车',
    dstop      string comment '站数',
    zterm      string comment '支付条款',
    invamt     decimal(15, 2) comment '凭证货币计量的净价值',
    invcost    decimal(15, 2) comment '凭证货币计量的净价值',
    matnr      string comment '物料號',
    arktx      string comment '物料描述',
    matkl      string comment 'Material Group',
    ekgrp      string comment '採購組',
    prodh      string comment 'Product hierarchy',
    fkimg      decimal(13, 3) comment '出貨數量',
    vrkme      string comment '销售单位(NIO备注:对于系统来说这是销售单位,但对于sales来说基本单位才是卖给客人的销售单位)',
    umvkz      bigint comment '销售数量转换成SKU的分子(因子)',
    umvkn      bigint comment '销售数量转换为 SKU 的值(除数)',
    meins      string comment '基本单位',
    ntgew      decimal(13, 3) comment '磅數',
    brgew      decimal(13, 3) comment '毛重',
    gewei      string comment '淨重',
    iamt       decimal(15, 2) comment '客戶售價',
    imap       decimal(15, 2) comment '成本價',
    icost      decimal(15, 2) comment '給SALES的價格',
    idisc      decimal(15, 2) comment '凭证货币计量的净价值',
    itax       decimal(13, 2) comment '凭证货币税额',
    werks      string comment '工廠',
    vkgrp      string comment '銷售組',
    vkbur      string comment '銷售部門',
    salesgr    string comment '對應的Sales',
    salesoff   string comment '對應的Sales的直屬上級',
    zzshiptype string comment 'Ship Type',
    aupos      string comment '销售和分销凭证的项目号',
    kdkg1      string comment 'Reservation Indicator',
    auart      string comment 'SO訂單類型:ZIS :銷售合同出貨',
    pstyv      string comment 'TANN為FREE標記',
    zwintme01  string comment '送货时间1',
    zwintme02  string comment '送货时间2',
    zwintme03  string comment '送货时间3',
    zwintme04  string comment '送货时间4',
    vsbed      string comment 'Shipping Conditions',
    city1      string comment 'Ship-to party City',
    invamtitax decimal(15, 2) comment 'Amount with tax(含税的订单总价钱)'
)
    comment '接口時按發票創建日期(ERDAT)查詢數據'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
    NULL DEFINED AS ''
    LOCATION '/warehouse/db_core/ods/ods_tb_readonly_invoice_data_inc_month/';

1.2插入數據

set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=10000;
set hive.exec.max.dynamic.partitions.pernode= 2000;
insert overwrite table ods_tb_readonly_invoice_data_inc_month partition (dt)
select vbeln,
       sonum,
       xblnr,
       dnnum,
       posnr,
       knumv,
       fkart,
       fkdat,
       erdat,
       kunrg,
       kunag,
       dname,
       truck,
       dstop,
       zterm,
       invamt,
       invcost,
       matnr,
       arktx,
       matkl,
       ekgrp,
       prodh,
       fkimg,
       vrkme,
       umvkz,
       umvkn,
       meins,
       ntgew,
       brgew,
       gewei,
       iamt,
       imap,
       icost,
       idisc,
       itax,
       werks,
       vkgrp,
       vkbur,
       salesgr,
       salesoff,
       zzshiptype,
       aupos,
       kdkg1,
       auart,
       pstyv,
       zwintme01,
       zwintme02,
       zwintme03,
       zwintme04,
       vsbed,
       city1,
       invamtitax,
       date_format(fkdat,'yyyy-MM') dt
from ods_tb_readonly_invoice_data_full

3 按照三個月的範圍,按照日期每日導入數據
3.1建表

drop table if exists ods_tb_readonly_invoice_data_day;
create EXTERNAL table ods_tb_readonly_invoice_data_day
(
    vbeln      string comment '發票號碼',
    sonum      string comment 'so單號',
    xblnr      string comment '参考凭证编号',
    dnnum      string comment 'DN單號',
    posnr      string comment '销售和分销凭证的项目号',
    knumv      string comment '单据条件数',
    fkart      string comment 'F2:正常出货;RE: 退货;L2: Debit Memo (額外多收客人钱);G2 : Credit Memo (退钱给客人);S1: 取消出货S2: 取消Credit Memo',
    erdat      string comment '創建日期',
    kunrg      string comment '付款方',
    kunag      string comment 'Sold-to party送达方',
    dname      string comment '司机名',
    truck      string comment '货车',
    dstop      string comment '站数',
    zterm      string comment '支付条款',
    invamt     decimal(15, 2) comment '凭证货币计量的净价值',
    invcost    decimal(15, 2) comment '凭证货币计量的净价值',
    matnr      string comment '物料號',
    arktx      string comment '物料描述',
    matkl      string comment 'Material Group',
    ekgrp      string comment '採購組',
    prodh      string comment 'Product hierarchy',
    fkimg      decimal(13, 3) comment '出貨數量',
    vrkme      string comment '销售单位(NIO备注:对于系统来说这是销售单位,但对于sales来说基本单位才是卖给客人的销售单位)',
    umvkz      bigint comment '销售数量转换成SKU的分子(因子)',
    umvkn      bigint comment '销售数量转换为 SKU 的值(除数)',
    meins      string comment '基本单位',
    ntgew      decimal(13, 3) comment '磅數',
    brgew      decimal(13, 3) comment '毛重',
    gewei      string comment '淨重',
    iamt       decimal(15, 2) comment '客戶售價',
    imap       decimal(15, 2) comment '成本價',
    icost      decimal(15, 2) comment '給SALES的價格',
    idisc      decimal(15, 2) comment '凭证货币计量的净价值',
    itax       decimal(13, 2) comment '凭证货币税额',
    werks      string comment '工廠',
    vkgrp      string comment '銷售組',
    vkbur      string comment '銷售部門',
    salesgr    string comment '對應的Sales',
    salesoff   string comment '對應的Sales的直屬上級',
    zzshiptype string comment 'Ship Type',
    aupos      string comment '销售和分销凭证的项目号',
    kdkg1      string comment 'Reservation Indicator',
    auart      string comment 'SO訂單類型:ZIS :銷售合同出貨',
    pstyv      string comment 'TANN為FREE標記',
    zwintme01  string comment '送货时间1',
    zwintme02  string comment '送货时间2',
    zwintme03  string comment '送货时间3',
    zwintme04  string comment '送货时间4',
    vsbed      string comment 'Shipping Conditions',
    city1      string comment 'Ship-to party City',
    invamtitax decimal(15, 2) comment 'Amount with tax(含税的订单总价钱)'
)
    comment '接口時按發票創建日期(ERDAT)查詢數據'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
    NULL DEFINED AS ''
    LOCATION '/warehouse/db_core/ods/ods_tb_readonly_invoice_data_day/';

3.2 數據加載


use db_core;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=10000;
set hive.exec.max.dynamic.partitions.pernode=2000;
insert overwrite table ods_tb_readonly_invoice_data_day partition(dt)
select
   vbeln,
   sonum,
   xblnr,
   dnnum,
   posnr,
   knumv,
   fkart,
   erdat,
   kunrg,
   kunag,
   dname,
   truck,
   dstop,
   zterm,
   invamt,
   invcost,
   matnr,
   arktx,
   matkl,
   ekgrp,
   prodh,
   fkimg,
   vrkme,
   umvkz,
   umvkn,
   meins,
   ntgew,
   brgew,
   gewei,
   iamt,
   imap,
   icost,
   idisc,
   itax,
   werks,
   vkgrp,
   vkbur,
   salesgr,
   salesoff,
   zzshiptype,
   aupos,
   kdkg1,
   auart,
   pstyv,
   zwintme01,
   zwintme02,
   zwintme03,
   zwintme04,
   vsbed,
   city1,
   invamtitax,
   FKDAT
from ods_tb_readonly_invoice_data_inc_month
where FKDAT>='2015-11-30' and FKDAT<='2016-01-01';

3.3 每個三個月輸出的腳本

#!/bin/bash

start_date="2015-11-30"
end_date="2016-12-31"

start_month=`date -d "$start_date" +%Y-%m`
end_month=`date -d "$end_date" +%Y-%m`

diff_months=$(( ($(date -d $end_date +%s) - $(date -d $start_date +%s)) / (30*24*3600) ))

if [ $diff_months -gt 3 ]; 
then
    while [ "$start_month" != "$end_month" ]
    do
        start_day=`date -d "$start_date" +%Y-%m-%d`

        end_day=`date -d "$(date -d "$start_month-01 +3 months -1 day")" +%Y-%m-%d`

        if [ `date -d "$end_day" +%Y-%m` \> "$end_month" ]; then
            end_day=`date -d "$end_month-01 +1 month -1 day" +%Y-%m-%d`
        fi

        start_date=`date -d "$(date -d "$start_month-01 +1 month")" +%Y-%m-%d`
        if [ `date -d "$start_date" +%Y-%m` \> "$end_month" ]; then
            start_month="$end_month"
        else
            start_month=`date -d "$start_date" +%Y-%m`
        fi

        echo "start_day:$start_day,end_day:$end_day"
        
    done
else
    start_day=`date -d "$start_date" +%Y-%m-%d`
    end_day=`date -d "$end_date" +%Y-%m-%d`
    echo "start_day:$start_day,end_day:$end_day"
fi

效果:
在这里插入图片描述

3.3自動化腳本

#!/bin/bash
start_date=$1
end_date=$2

start_month=`date -d "$start_date" +%Y-%m`
end_month=`date -d "$end_date" +%Y-%m`

diff_months=$(( ($(date -d $end_date +%s) - $(date -d $start_date +%s)) / (30*24*3600) ))

if [ $diff_months -gt 3 ]; 
then
    while [ "$start_month" != "$end_month" ]
    do
        start_day=`date -d "$start_date" +%Y-%m-%d`

        end_day=`date -d "$(date -d "$start_month-01 +3 months -1 day")" +%Y-%m-%d`

        if [ `date -d "$end_day" +%Y-%m` \> "$end_month" ]; then
            end_day=`date -d "$end_month-01 +1 month -1 day" +%Y-%m-%d`
        fi

        start_date=`date -d "$(date -d "$start_month-01 +1 month")" +%Y-%m-%d`
        if [ `date -d "$start_date" +%Y-%m` \> "$end_month" ]; then
            start_month="$end_month"
        else
            start_month=`date -d "$start_date" +%Y-%m`
        fi
        echo "start_day:$start_day,end_day:$end_day"
         
        /opt/module/hive/bin/hive -e  "use db_core;set hive.exec.dynamic.partition.mode=nonstrict;set hive.exec.max.dynamic.partitions=10000;set hive.exec.max.dynamic.partitions.pernode=2000;
insert overwrite table db_core.ods_tb_readonly_invoice_data_day partition(dt)
select
   vbeln,
   sonum,
   xblnr,
   dnnum,
   posnr,
   knumv,
   fkart,
   erdat,
   kunrg,
   kunag,
   dname,
   truck,
   dstop,
   zterm,
   invamt,
   invcost,
   matnr,
   arktx,
   matkl,
   ekgrp,
   prodh,
   fkimg,
   vrkme,
   umvkz,
   umvkn,
   meins,
   ntgew,
   brgew,
   gewei,
   iamt,
   imap,
   icost,
   idisc,
   itax,
   werks,
   vkgrp,
   vkbur,
   salesgr,
   salesoff,
   zzshiptype,
   aupos,
   kdkg1,
   auart,
   pstyv,
   zwintme01,
   zwintme02,
   zwintme03,
   zwintme04,
   vsbed,
   city1,
   invamtitax,
   fkdat
from db_core.ods_tb_readonly_invoice_data_inc_month
where fkdat>='$start_day' and fkdat<='$end_day';"
        
    done
else
    start_day=`date -d "$start_date" +%Y-%m-%d`
    end_day=`date -d "$end_date" +%Y-%m-%d`
    echo "start_day:$start_day,end_day:$end_day"
    /opt/module/hive/bin/hive -e "use db_core;set hive.exec.dynamic.partition.mode=nonstrict;set hive.exec.max.dynamic.partitions=10000;set hive.exec.max.dynamic.partitions.pernode=2000;
insert overwrite table db_core.ods_tb_readonly_invoice_data_day partition(dt)
select
   vbeln,
   sonum,
   xblnr,
   dnnum,
   posnr,
   knumv,
   fkart,
   erdat,
   kunrg,
   kunag,
   dname,
   truck,
   dstop,
   zterm,
   invamt,
   invcost,
   matnr,
   arktx,
   matkl,
   ekgrp,
   prodh,
   fkimg,
   vrkme,
   umvkz,
   umvkn,
   meins,
   ntgew,
   brgew,
   gewei,
   iamt,
   imap,
   icost,
   idisc,
   itax,
   werks,
   vkgrp,
   vkbur,
   salesgr,
   salesoff,
   zzshiptype,
   aupos,
   kdkg1,
   auart,
   pstyv,
   zwintme01,
   zwintme02,
   zwintme03,
   zwintme04,
   vsbed,
   city1,
   invamtitax,
   fkdat
from db_core.ods_tb_readonly_invoice_data_inc_month
where fkdat>='$start_day' and fkdat<='$end_day';"
fi

效果:
在这里插入图片描述
在这里插入图片描述

總結:
對於很大的表數據,可以一開始先按月進行分區,按月分區之後,再在按月分區的基礎上,按照天進行分區,可以每次同步三個月的數據,比每次同步整個時間週期的效果要好很多

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

ysksolution

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值