shell 执行代码 进入hive_Shell脚本执行hive语句 | hive以日期建立分区表 | linux schedule程序...

本文介绍了一个Shell脚本,用于执行Hive数据操作,包括创建外部表、插入数据到分区表中,并结合日期进行数据处理。此外,脚本还涉及到Hadoop的jar包执行和数据导出到MySQL。
摘要由CSDN通过智能技术生成

#!/bin/bash

source /etc/profile;

##################################################

# Author: ouyangyewei #

# #

# Content: Combineorder Algorithm #

##################################################

# change workspace to here

cd /

cd /home/deploy/recsys/algorithm/schedule/project/combineorder

# generate product_sell data

yesterday=$(date -d '-1 day' '+%Y-%m-%d')

lastweek=$(date -d '-1 week' '+%Y-%m-%d')

/usr/local/cloud/hive/bin/hive<

CREATE EXTERNAL TABLE IF NOT EXISTS product_sell(

category_id bigint,

province_id bigint,

product_id bigint,

price double,

sell_num bigint

)

PARTITIONED BY (ds string)

ROW FORMAT DELIMITED

FIELDS TERMINATED BY '\t'

LINES TERMINATED BY '\n'

STORED AS TEXTFILE;

INSERT OVERWRITE TABLE product_sell PARTITION (ds='$yesterday') select a.category_id, b.good_receiver_province_id as province_id, a.id as product_id, (b.sell_amount/b.sell_num) as price, b.sell_num from product a join (select si.product_id, s.good_receiver_province_id, sum(si.order_item_amount) sell_amount, sum(si.order_item_num) sell_num from so_item si join so s on (si.order_id=s.id) where si.is_gift=0 and si.is_hidden=0 and si.ds between '$lastweek' and '$yesterday' group by s.good_receiver_province_id, si.product_id) b on (a.id=b.product_id);

EOF

# generate yhd_gmv_month data

yesterday=$(date -d '-1 day' '+%Y-%m-%d')

lastmonth=$(date -d '-1 month' '+%Y-%m-%d')

/usr/local/cloud/hive/bin/hive<

CREATE EXTERNAL TABLE IF NOT EXISTS yhd_gmv_month(

province_id bigint,

price_area int,

product_id bigint,

sell_num bigint

)

PARTITIONED BY (ds string)

ROW FORMAT DELIMITED

FIELDS TERMINATED BY '\t'

LINES TERMINATED BY '\n'

STORED AS TEXTFILE;

INSERT OVERWRITE TABLE yhd_gmv_month PARTITION (ds='$yesterday') select ssi.province_id, (case when price>0.0 and price<=10.0 then 0 when price>10.0 and price<=20.0 then 1 when price>20.0 and price<=30.0 then 2 when price>30.0 then 3 else -1 end) as price_area, ssi.product_id, ssi.sell_num from (select s.good_receiver_province_id as province_id, si.product_id, sum(si.order_item_num) as sell_num, sum(si.order_item_amount)/sum(si.order_item_num) as price from so_item si join so s on (si.order_id=s.id) where si.is_hidden=0 and si.is_gift=0 and si.ds between '$lastmonth' and '$yesterday' group by s.good_receiver_province_id, si.product_id) ssi;

EOF

# execute the combineorder algorithm job

cd /

cd /home/deploy/recsys/algorithm/schedule/project/combineorder/schedule/pms_category_rec_prod

hadoop jar /home/deploy/recsys/algorithm/schedule/project/combineorder/schedule/recommender-dm-1.0-SNAPSHOT.jar com.yhd.recommender.combineorder.schedule.CombineorderRecommendScheduler

# export "pms_category_rec_prod" data to mysql

cd /

cd /home/deploy/recsys/algorithm/schedule/project/combineorder/schedule/pms_category_rec_prod

hadoop jar /home/deploy/recsys/algorithm/schedule/project/combineorder/schedule/recommender-dm-1.0-SNAPSHOT.jar com.yhd.recommender.exporter.db.HdfsToDBProcessor

# check "yhd_gmv_month" is exist

yesterday=$(date -d '-1 day' '+%Y-%m-%d')

hadoop fs -test -e /user/hive/warehouse/yhd_gmv_month/ds=2014-08-27

if [ $? -ne 0 ] ;then

echo 'Error! Directory is not exist'

else

# auto modify date time

oldestVersionDay=$(date -d '-3 day' '+%Y-%m-%d')

olderVersionDay=$(date -d '-2 day' '+%Y-%m-%d')

newVersionDay=$(date -d '-1 day' '+%Y-%m-%d')

sed -r -i '{s/oldestVersion=\/user\/hive\/warehouse\/yhd_gmv_month\/ds=.*/oldestVersion=\/user\/hive\/warehouse\/yhd_gmv_month\/ds='"${oldestVersionDay}"'/}' /home/deploy/recsys/algorithm/schedule/verifaction/combineorder/yhd_gmv_month/input/verification.properties

sed -r -i '{s/olderVersion=\/user\/hive\/warehouse\/yhd_gmv_month\/ds=.*/olderVersion=\/user\/hive\/warehouse\/yhd_gmv_month\/ds='"${olderVersionDay}"'/}' /home/deploy/recsys/algorithm/schedule/verifaction/combineorder/yhd_gmv_month/input/verification.properties

sed -r -i '{s/newVersion=\/user\/hive\/warehouse\/yhd_gmv_month\/ds=.*/newVersion=\/user\/hive\/warehouse\/yhd_gmv_month\/ds='"${newVersionDay}"'/}' /home/deploy/recsys/algorithm/schedule/verifaction/combineorder/yhd_gmv_month/input/verification.properties

# export "yhd_gmv_month" data to mysql

cd /

cd /home/deploy/recsys/algorithm/schedule/project/combineorder/schedule/yhd_gmv_month

hadoop jar /home/deploy/recsys/algorithm/schedule/project/combineorder/schedule/recommender-dm-1.0-SNAPSHOT.jar com.yhd.recommender.exporter.db.HdfsToDBProcessor

fi

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值