Sqoop对mysql进行接入hive的shell脚本
1.表准备
CREATE TABLE sc(
`sid` int ,
`cid` int ,
`gd` int
) COMMENT = 'sc'
;
CREATE TABLE src.src_sc (
`sid` int ,
`cid` int ,
`gd` int
) COMMENT 'sc'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' STORED AS textfile;
drop table src.src_sc_d;
CREATE TABLE src.src_sc_d (
`sid` int ,
`cid` int ,
`gd` int
) COMMENT 'sc'
PARTITIONED BY (etl_date STRING COMMENT 'etl_日期')
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
lines terminated by '\n'
STORED AS textfile;
2.shell脚本接入
2.1.非分区表接入脚本
sid=$1
sqoop_home=/opt/links/sqoop/bin/sqoop
connect=jdbc:mysql://hlm03:8021/bfd
username=root
password=root
sql="select sid,cid,gd from bfd.sc where sid <= $sid and \$CONDITIONS "
hive_database=src
hive_table=src_sc
target_dir=/user/hive/warehouse/src.db/src_sc
lines_terminated_by="\n"
fields_terminated_by="\001"
num_mappers=1
import_data() {
$sqoop_home import \
--connect $connect \
--username $username \
--password $password \
--query "$sql" \
--hive-database $hive_database \
--hive-table $hive_table \
--null-string '\\N' \
--null-non-string '\\N' \
--num-mappers $num_mappers \
--target-dir $target_dir \
--lines-terminated-by "$lines_terminated_by" \
--fields-terminated-by "$fields_terminated_by" \
--hive-drop-import-delims \
--delete-target-dir
}
import_data
/opt/links/sqoop/bin/sqoop import \
--connect jdbc:mysql://hlm03:8021/bfd \
--username root \
--password root \
--query "select sid,cid,gd from bfd.sc where \$CONDITIONS and sid <= 4" \
--hive-database src \
--hive-table src_sc \
--null-string '\\N' \
--null-non-string '\\N' \
--num-mappers 1 \
--target-dir /user/hive/warehouse/src.db/src_sc \
--lines-terminated-by '\n' \
--fields-terminated-by "\001" \
--delete-target-dir
2.2.分区表接入脚本
sid=$1
etl_date=`date -d "1 day ago" +'%Y-%m-%d'`
sqoop_home=/opt/links/sqoop/bin/sqoop
connect=jdbc:mysql://hlm03:8021/bfd
username=root
password=root
sql="select sid,cid,gd from bfd.sc where sid <= $sid and \$CONDITIONS "
hive_database=src
hive_table=src_sc_d
hive_partition_key=etl_date
hive_partition_value=$etl_date
target_dir=/user/hive/warehouse/src.db/src_sc_d
lines_terminated_by="\n"
fields_terminated_by="\001"
num_mappers=1
import_partition_data() {
$sqoop_home import \
--connect $connect \
--username $username \
--password $password \
--query "$sql" \
--target-dir $target_dir \
--delete-target-dir \
--hive-import \
--hive-overwrite \
--hive-database $hive_database \
--hive-table $hive_table \
--hive-partition-key $hive_partition_key \
--hive-partition-value $hive_partition_value \
--fields-terminated-by $fields_terminated_by \
-m $num_mappers
}
import_partition_data