【sqoop官方文档】 http://sqoop.apache.org/docs/1.4.6/SqoopUserGuide.html#_incremental_imports
--1、 mysql建表
drop table cust_info;
create table cust_info(
`id` bigint comment '编号',
`telephone` VARCHAR(20) comment '电话',
`tname` VARCHAR(50) comment '姓名',
`create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '创建时间',
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '更新时间'
)comment '人员信息' ;
alter table cust_info change tname tname varchar(255) character set utf8;
--备注:mysql 中文字符 设置为 utf8
--INSERT INTO cust_info VALUES (1,'13511001','张三',current_timestamp(),current_timestamp());
--INSERT INTO cust_info VALUES (2,'13611001','李四',current_timestamp(),current_timestamp());
INSERT INTO cust_info VALUES (1,'13511001','张三',STR_TO_DATE('2020-08-07 16:01:45', '%Y-%m-%d %H:%i:%s'),STR_TO_DATE('2020-08-07 16:01:45', '%Y-%m-%d %H:%i:%s'));
INSERT INTO cust_info VALUES (2,'13611001','李四',STR_TO_DATE('2020-08-08 16:01:45', '%Y-%m-%d %H:%i:%s'),STR_TO_DATE('2020-08-08 16:01:45', '%Y-%m-%d %H:%i:%s'));
-- hive导出表
drop table hive_cust_info;
create table hive_cust_info(
`id` bigint comment '编号',
`telephone` VARCHAR(20) comment '电话',
`tname` VARCHAR(50) comment '姓名',
`create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '创建时间',
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '更新时间'
)comment '人员信息' ;
alter table hive_cust_info change tname tname varchar(255) character set utf8;
SELECT * FROM cust_info ;
--2、rsc 层建表语句
drop table rsc.cust_info;
create table rsc.cust_info(
`id` bigint comment '编号',
`telephone` string comment '电话',
`tname` string comment '姓名',
`create_time` timestamp comment '创建时间',
`update_time` timestamp comment '更新时间'
)comment '人员信息'
PARTITIONED BY(etl_dt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\u0001'
STORED AS TEXTFILE;
--备注 rsc 一般为分表以拉链表方式保留历史
--rsc建表格式为TEXTFILE、字段分隔符和mysql端保持一致为【\u0001】
--rdw层建表
create table rdw.t01_cust(
`id` bigint comment '编号',
`telephone` string comment '电话',
`tname` string comment '姓名',
`create_time` timestamp comment '创建时间',
`update_time` timestamp comment '更新时间'
)comment '人员信息'
PARTITIONED BY(etl_dt string)
STORED AS ORC;
-- sqoop 任务1 (mysql导出到hive)
sqoop import --connect jdbc:mysql://192.168.91.112:3306/bgdmysqldb --username root --password '2019_Mysql' --table cust_info --columns "id,telephone,tname,create_time,update_time" --where " 1=1 and (create_time>=concat('2020-08-07',' 00:00:00') and create_time<=concat('2020-08-07',' 23:59:59')) OR (update_time>=concat('2020-08-07',' 00:00:00') and update_time<=concat('2020-08-07',' 23:59:59')) " --fields-terminated-by '\001' --num-mappers 1 --hive-import --hive-database rsc --hive-table cust_info --delete-target-dir --hive-partition-key etl_dt --hive-partition-value 2020-08-04 --hive-drop-import-delims --null-string 'N' --null-non-string '0'
--以下是换行版本(功能和以上相同,“--” 之间用 空格分开)
sqoop import \
--connect jdbc:mysql://192.168.91.112:3306/bgdmysqldb\
--username root\
--password '2019_Mysql'\
--table cust_info\
--columns "id,telephone,tname,create_time,update_time"\
--where " 1=1 and (create_time>=concat('2020-08-07',' 00:00:00') and create_time<=concat('2020-08-07',' 23:59:59')) OR (update_time>=concat('2020-08-07',' 00:00:00') and update_time<=concat('2020-08-07',' 23:59:59')) "\
--fields-terminated-by '\001'\
--num-mappers 1\
--hive-import\
--hive-database rsc\
--hive-table cust_info\
--delete-target-dir\
--hive-partition-key etl_dt\
--hive-partition-value 2020-08-04\
--hive-drop-import-delims\
--null-string 'N'\
--null-non-string '0'
----------------------------------------------------------------------------------------
-- 备注:sqoop 导入后字段分隔符--fields-terminated-by '\001' 与hive的默认字段分隔符 '\001' 保持一致。
--sqoop任务2(从hive导出到mysql)
sqoop export --connect 'jdbc:mysql://192.168.91.112:3306/bgdmysqldb?useUnicode=true&characterEncoding=utf-8' --username root --password '2019_Mysql' --table hive_cust_info --export-dir /user/hive/warehouse/rsc.db/cust_info/etl_dt=2020-08-04 --input-fields-terminated-by '\001'
-- 备注1 “?useUnicode=true&characterEncoding=utf-8” 防止导出乱码
--备注2 input-fields-terminated-by '\001' 使用正确分隔符。
-- 参考:
sqoop import --connect jdbc:mysql://10.10.10.10:7185/jssclub?tinyInt1isBit=false
--username BI
--password xxxxxxxxx
--table video
--columns "id,title,video_img,video_link,lecturer_name,avatar,lecturer_introduce,description,video_type,sort,video_status,create_time,update_time,one_type,video_duration,video_img_clear,entry_video_type,train_type"
--where "1=1" -m 1
--class-name rsc_jssclub_video
--delete-target-dir
--target-dir /tmp/rsc/jssclub_video_2020-08-04
--hive-import
--hive-drop-import-delims
--hive-database rsc
--hive-table jssclub_video
--hive-partition-key etl_dt
--hive-partition-value 2020-08-04
--fields-terminated-by '\001'
--null-string '\\N'
--null-non-string '0'
--input-null-string \\N
--input-null-non-string "\\N"
参考(二)hive中的分隔符:
https://blog.csdn.net/qq_26442553/article/details/80297028?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-3.channel_param&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-3.channel_param