数仓库常用脚本
xsync集群同步分发脚本
#!/bin/bash
#1 获取输入参数个数,如果没有参数,直接退出
pcount=$#
if((pcount==0)); then
echo no args;
exit;
fi
#2 获取文件名称
p1=$1
fname=`basename $p1`
echo fname=$fname
#3 获取上级目录到绝对路径
pdir=`cd -P $(dirname $p1); pwd`
echo pdir=$pdir
#4 获取当前用户名称
user=`whoami`
#5 循环
for((host=1; host<=5; host++)); do
echo ------------------- cluster$host --------------
rsync -av $pdir/$fname $user@hadoop$host:$pdir
done
数据同步脚本,从hdfs到hive
1.首先在hive中创建表,并指定存储格式、压缩格式、存储位置
DROP TABLE IF EXISTS TABLE_NAME;
CREATE TABLE TABLE_NAME(
)
PARTITIONED BY ()
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\T'
LINES TERMINATED BY '\N'
COLLECTION ITEMS TERMINATED BY ','
MAP KEYS TERMINATED BY ':'
STORED AS
INPUTFORMAT = ""
OUTPUTFORMAT = ""
LOCATION '/X/X/X/X/X';
......
2.导入数据
#!/bin/bash
APP=beilei
hive=/x/x/x/hive
if [ -n "$1" ]; then
do_date = "$1"
else
do_date = `date -d "-1 day" + %F`
fi
sql="
load data inpath '/x/x/x/x/' overwrite into table "$APP".table_name partition(dt = "$do_date");
load data inpath '/x/x/x/x/' overwrite into table "$APP".table_name partition(dt = "$do_date");
.......
"
3.查看数据是否导入成功
-- 通过HUE来查看表是否创建成功
select * from table limit 1 ;
sqoop导数据脚本
#!/bin/bash
db_date=$2 #第二个参数
echo $db_date #操作日期
db_name=gmall #数据库名称
# 定义从数据库到hdfs的函数
import_data() {
sqoop import \
--connect jdbc:mysql://hadoop102:3306/$db_name \
--username root \
--password 000000 \
--target-dir /origin_data/$db_name/db/$1/$db_date \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by "\t" \
--query "$2"' and $CONDITIONS;'
}
import_sku_info(){
import_data "sku_info" "select
id, spu_id, price, sku_name, sku_desc, weight, tm_id,
category3_id, create_time
from sku_info where 1=1"
}
import_user_info(){
import_data "user_info" "select
id, name, birthday, gender, email, user_level,
create_time
from user_info where 1=1"
}
import_base_category1(){
import_data "base_category1" "select
id, name from base_category1 where 1=1"
}
import_base_category2(){
import_data "base_category2" "select
id, name, category1_id from base_category2 where 1=1"
}
import_base_category3(){
import_data "base_category3" "select id, name, category2_id from base_category3 where 1=1"
}
import_order_detail(){
import_data "order_detail" "select
od.id,
order_id,
user_id,
sku_id,
sku_name,
order_price,
sku_num,
o.create_time
from order_info o , order_detail od
where o.id=od.order_id
and DATE_FORMAT(create_time,'%Y-%m-%d')='$db_date'"
}
import_payment_info(){
import_data "payment_info" "select
id,
out_trade_no,
order_id,
user_id,
alipay_trade_no,
total_amount,
subject,
payment_type,
payment_time
from payment_info
where DATE_FORMAT(payment_time,'%Y-%m-%d')='$db_date'"
}
import_order_info(){
import_data "order_info" "select
id,
total_amount,
order_status,
user_id,
payment_way,
out_trade_no,
create_time,
operate_time
from order_info
where (DATE_FORMAT(create_time,'%Y-%m-%d')='$db_date' or DATE_FORMAT(operate_time,'%Y-%m-%d')='$db_date')"
}
case $1 in
"base_category1")
import_base_category1
;;
"base_category2")
import_base_category2
;;
"base_category3")
import_base_category3
;;
"order_info")
import_order_info
;;
"order_detail")
import_order_detail
;;
"sku_info")
import_sku_info
;;
"user_info")
import_user_info
;;
"payment_info")
import_payment_info
;;
"all")
import_base_category1
import_base_category2
import_base_category3
import_order_info
import_order_detail
import_sku_info
import_user_info
import_payment_info
;;
esac