(1)在sqoop-env.sh 添加
export HADOOP_COMMON_HOME=/opt/module/hadoop-3.1.3
export HADOOP_MAPRED_HOME=/opt/module/hadoop-3.1.3
export HIVE_HOME=/opt/module/hive
export ZOOKEEPER_HOME=/opt/module/zookeeper-3.5.7
export ZOOCFGDIR=/opt/module/zookeeper-3.5.7/conf
– 用sqoop查询mysql中的数据库
bin/sqoop list-databases --connect jdbc:mysql://hadoop102:3306 --username root --password 123456
(2)利用sqoop把数据从Mysql中导入到HDFS中的常用参数
bin/sqoop import \
-------------------------公共参数------------------------------
--connect 指定jdbc url
--password 指定数据库的用户名
--username 指定数据库的用户名
----------------------- 导入dhfs使用----------------------------
-- append 追加数据到HDFS 目录
-- as-textfile 将数据写成textfile文件格式
-- as-parquetfile 将数据写成parquet文件格式
-- delete-target-dir 在写入HDFS之前,删除目标目录
-- target-dir 指定HDFS存储目录
-- fetch-size 每个批次从mysql中获取多少个数据
--num-mappers[-m] 指定mapper的个数
--query[-e] 直接通过sql语句查询符合我们要求的数据 select .. from .. where ..
--table 指定查询哪个表的数据
--where 指定查询的数据条件
--columns 指定导出mysql表的哪些字段
--split-by 指定按照哪个列进行切分
--compress 导入hdfs的时候是否压缩
--compression-codec 指定压缩格式
--null-string 将mysql string类型的null值转成hive的null值
--null-non-string 将mysql 非string类型的null值转成hive的null值
--check-column 指定按照哪个列进行增量导入
--incremental append[只导入新增数据]/lastmodified[导入新增与修改数据]
--last-value 上一次的最后的修改值
--fields-terminated-by 指定数据保存到HDFS的时候,字段之间的分隔符是什么
--lines-terminated-by 指定数据保存到HDFS的时候,行之间的分隔符是什么
例子1:从mysql导入到HDFS
bin/sqoop import --connect jdbc:mysql://hadoop102:3306/gmall \
--password 123456 \
--username root \
--as-textfile \
--delete-target-dir \
--fetch-size 100 \
--num-mappers 2 \
--query "select * from user_info where id>1 and id<=100 and \$CONDITIONS" \
--split-by id \
--null-string '\\N' \
--null-non-string '\\N' \
--fields-terminated-by '\t' \
--target-dir hdfs://hadoop102:9820/sqoop7
---------------导入hive使用------------------
--hive-import 表示将数据直接导入hive表
--hive-overwrite 是否覆盖hive数据
--create-hive-table hive表不存在的时候,是否创建hive表
--hive-table 指定hive表
--hive-partition-key 指定hive表的分区字段
--hive-partition-value 指定hive分区值
(3)利用sqoop把数据从HDFS中导入到Mysql中的常用参数
bin/sqoop export \
-----------------公共参数----------------------
--connect 指定jdbc url
--password 指定数据库密码
--username 指定数据库的用户名
-------------------------------------------
--columns 导入数据到mysql哪些列
--export-dir 从hdfs哪个路径导出数据
--num-mappers 指定mapper的个数
--table 将数据导入mysql哪个表
--update-key 按照哪个字段来判断数据是否在mysql中已经存在
--update-mode updateonly[只更新]/allowinsert[如果存在,就更新,不存在就插入]
--input-null-string 如果数据是字符串类型,写入mysql的时候写成什么
--input-null-non-string 如果数据是非字符串类型,写入mysql的时候写成什么
例子2:从HDFS中导入到mysql
bin/sqoop export --connect "jdbc:mysql://hadoop102:3306/gmall?useUnicode=true&characterEncoding=utf-8" \
--password 123456 \
--username root \
--export-dir hdfs://hadoop102:9820/sqoop7 \
--num-mappers 2 \
--table user_info_n \
--fields-terminated-by '\t' \
--input-null-string '\\N' \
--input-null-non-string '\\N'
例子2:从HDFS中取某些字段导入到mysql
bin/sqoop export --connect "jdbc:mysql://hadoop102:3306/gmall?useUnicode=true&characterEncoding=utf-8" \
--password 123456 \
--username root \
--export-dir hdfs://hadoop102:9820/sqoop7 \
--num-mappers 2 \
--table user_info_s \
--columns 'id,login_name' \
--fields-terminated-by '\t' \
--input-null-string '\\N' \
--input-null-non-string '\\N'
(4)采集脚本
#! /bin/bash
sqoop=/opt/module/sqoop/bin/sqoop
if [ -n "$2" ] ;then
do_date=$2
else
do_date=`date -d '-1 day' +%F`
fi
import_data(){
$sqoop import \
--connect jdbc:mysql://hadoop102:3306/gmall \
--username root \
--password 000000 \
--target-dir /origin_data/gmall/db/$1/$do_date \
--delete-target-dir \
--query "$2 and \$CONDITIONS" \
--num-mappers 1 \
--fields-terminated-by '\t' \
--compress \
--compression-codec lzop \
--null-string '\\N' \
--null-non-string '\\N'
hadoop jar /opt/module/hadoop-3.1.3/share/hadoop/common/hadoop-lzo-0.4.20.jar com.hadoop.compression.lzo.DistributedLzoIndexer /origin_data/gmall/db/$1/$do_date
}
import_order_info(){
import_data order_info "select
id,
final_total_amount,
order_status,
user_id,
out_trade_no,
create_time,
operate_time,
province_id,
benefit_reduce_amount,
original_total_amount,
feight_fee
from order_info
where (date_format(create_time,'%Y-%m-%d')='$do_date'
or date_format(operate_time,'%Y-%m-%d')='$do_date')"
}
import_coupon_use(){
import_data coupon_use "select
id,
coupon_id,
user_id,
order_id,
coupon_status,
get_time,
using_time,
used_time
from coupon_use
where (date_format(get_time,'%Y-%m-%d')='$do_date'
or date_format(using_time,'%Y-%m-%d')='$do_date'
or date_format(used_time,'%Y-%m-%d')='$do_date')"
}
import_order_status_log(){
import_data order_status_log "select
id,
order_id,
order_status,
operate_time
from order_status_log
where date_format(operate_time,'%Y-%m-%d')='$do_date'"
}
import_activity_order(){
import_data activity_order "select
id,
activity_id,
order_id,
create_time
from activity_order
where date_format(create_time,'%Y-%m-%d')='$do_date'"
}
import_user_info(){
import_data "user_info" "select
id,
name,
birthday,
gender,
email,
user_level,
create_time,
operate_time
from user_info
where (DATE_FORMAT(create_time,'%Y-%m-%d')='$do_date'
or DATE_FORMAT(operate_time,'%Y-%m-%d')='$do_date')"
}
import_order_detail(){
import_data order_detail "select
od.id,
order_id,
user_id,
sku_id,
sku_name,
order_price,
sku_num,
od.create_time,
source_type,
source_id
from order_detail od
join order_info oi
on od.order_id=oi.id
where DATE_FORMAT(od.create_time,'%Y-%m-%d')='$do_date'"
}
import_payment_info(){
import_data "payment_info" "select
id,
out_trade_no,
order_id,
user_id,
alipay_trade_no,
total_amount,
subject,
payment_type,
payment_time
from payment_info
where DATE_FORMAT(payment_time,'%Y-%m-%d')='$do_date'"
}
import_comment_info(){
import_data comment_info "select
id,
user_id,
sku_id,
spu_id,
order_id,
appraise,
comment_txt,
create_time
from comment_info
where date_format(create_time,'%Y-%m-%d')='$do_date'"
}
import_order_refund_info(){
import_data order_refund_info "select
id,
user_id,
order_id,
sku_id,
refund_type,
refund_num,
refund_amount,
refund_reason_type,
create_time
from order_refund_info
where date_format(create_time,'%Y-%m-%d')='$do_date'"
}
import_sku_info(){
import_data sku_info "select
id,
spu_id,
price,
sku_name,
sku_desc,
weight,
tm_id,
category3_id,
create_time
from sku_info where 1=1"
}
import_base_category1(){
import_data "base_category1" "select
id,
name
from base_category1 where 1=1"
}
import_base_category2(){
import_data "base_category2" "select
id,
name,
category1_id
from base_category2 where 1=1"
}
import_base_category3(){
import_data "base_category3" "select
id,
name,
category2_id
from base_category3 where 1=1"
}
import_base_province(){
import_data base_province "select
id,
name,
region_id,
area_code,
iso_code
from base_province
where 1=1"
}
import_base_region(){
import_data base_region "select
id,
region_name
from base_region
where 1=1"
}
import_base_trademark(){
import_data base_trademark "select
tm_id,
tm_name
from base_trademark
where 1=1"
}
import_spu_info(){
import_data spu_info "select
id,
spu_name,
category3_id,
tm_id
from spu_info
where 1=1"
}
import_favor_info(){
import_data favor_info "select
id,
user_id,
sku_id,
spu_id,
is_cancel,
create_time,
cancel_time
from favor_info
where 1=1"
}
import_cart_info(){
import_data cart_info "select
id,
user_id,
sku_id,
cart_price,
sku_num,
sku_name,
create_time,
operate_time,
is_ordered,
order_time,
source_type,
source_id
from cart_info
where 1=1"
}
import_coupon_info(){
import_data coupon_info "select
id,
coupon_name,
coupon_type,
condition_amount,
condition_num,
activity_id,
benefit_amount,
benefit_discount,
create_time,
range_type,
spu_id,
tm_id,
category3_id,
limit_num,
operate_time,
expire_time
from coupon_info
where 1=1"
}
import_activity_info(){
import_data activity_info "select
id,
activity_name,
activity_type,
start_time,
end_time,
create_time
from activity_info
where 1=1"
}
import_activity_rule(){
import_data activity_rule "select
id,
activity_id,
condition_amount,
condition_num,
benefit_amount,
benefit_discount,
benefit_level
from activity_rule
where 1=1"
}
import_base_dic(){
import_data base_dic "select
dic_code,
dic_name,
parent_code,
create_time,
operate_time
from base_dic
where 1=1"
}
case $1 in
"order_info")
import_order_info
;;
"base_category1")
import_base_category1
;;
"base_category2")
import_base_category2
;;
"base_category3")
import_base_category3
;;
"order_detail")
import_order_detail
;;
"sku_info")
import_sku_info
;;
"user_info")
import_user_info
;;
"payment_info")
import_payment_info
;;
"base_province")
import_base_province
;;
"base_region")
import_base_region
;;
"base_trademark")
import_base_trademark
;;
"activity_info")
import_activity_info
;;
"activity_order")
import_activity_order
;;
"cart_info")
import_cart_info
;;
"comment_info")
import_comment_info
;;
"coupon_info")
import_coupon_info
;;
"coupon_use")
import_coupon_use
;;
"favor_info")
import_favor_info
;;
"order_refund_info")
import_order_refund_info
;;
"order_status_log")
import_order_status_log
;;
"spu_info")
import_spu_info
;;
"activity_rule")
import_activity_rule
;;
"base_dic")
import_base_dic
;;
"first")
import_base_category1
import_base_category2
import_base_category3
import_order_info
import_order_detail
import_sku_info
import_user_info
import_payment_info
import_base_province
import_base_region
import_base_trademark
import_activity_info
import_activity_order
import_cart_info
import_comment_info
import_coupon_use
import_coupon_info
import_favor_info
import_order_refund_info
import_order_status_log
import_spu_info
import_activity_rule
import_base_dic
;;
"all")
import_base_category1
import_base_category2
import_base_category3
import_order_info
import_order_detail
import_sku_info
import_user_info
import_payment_info
import_base_trademark
import_activity_info
import_activity_order
import_cart_info
import_comment_info
import_coupon_use
import_coupon_info
import_favor_info
import_order_refund_info
import_order_status_log
import_spu_info
import_activity_rule
import_base_dic
;;
esac