应用环境:在实际业务中,表中数据经常会被修改,这些表只能采用全量导入而不能使用增量导入
前期准备
创建sqoop密码文件并上传至hdfs
echo -n root > sqoopPWD.pwd
hdfs dfs -mkdir -p /sqoop/pwd
hdfs dfs -put sqoopPWD.pwd /sqoop/pwd
rm -rf sqoopPWD.pwd
创建sqoop job脚本
#!/bin/bash
#list为需要全量导入的表名
list="cart code_brand code_category code_city code_education code_email_suffix code_goods code_profession user user_addr user_extend"
for tablename in $list;do
echo "正在创建bap_${tablename}"
sqoop job --delete bap_${tablename}
sqoop job --create bap_${tablename} \
-- import \
--connect jdbc:mysql://singleNode:3306/snbap_ods \
--driver com.mysql.jdbc.Driver \
--username root \
#该文件为上传到hdfs的sqoop密码
--password-file /sqoop/pwd/sqoopPWD.pwd \
--table ${tablename} \
#该目录为hive中对应表的location目录
--delete-target-dir \
--target-dir /snbap/ods/ods_${tablename} \
--fields-terminated-by '\001'
done
创建启动job脚本
#!bin/bash
list="cart code_brand code_category code_city code_education code_email_suffix code_goods code_profession user user_addr user_extend"
for tablename in $list;do
echo "正在执行bap_${tablename} job"
sqoop job --exec bap_${tablename}
echo "bap_${tablename} job 已执行完成"
done
rm -f *.java