1 .创建如下test表
4
+----+------+---------------------+
| id | val | created_at |
+----+------+---------------------+
| 0 | 0 | 2012-05-30 15:27:48 |
| 1 | 4 | 2012-05-30 15:28:33 |
| 2 | 14 | 2012-05-30 15:28:33 |
| 3 | 100 | 2012-05-30 15:41:17 |
+----+------+---------------------+
2 . 源表导入到本地HDFS
sqoop job --create import --connect jdbc:mysql://<host name>/<database> --table test --username hive -P -m 1 --target-dir old
3 更新表
update test set val=210 where id=3;
+----+------+---------------------+
| id | val | created_at |
+----+------+---------------------+
| 0 | 0 | 2012-05-30 15:27:48 |
| 1 | 4 | 2012-05-30 15:28:33 |
| 2 | 14 | 2012-05-30 15:28:33 |
| 3 | 210 | 2012-05-31 10:09:28 |
+----+------+---------------------+
4
select max(created_at) from test into max_dt
sqoop-job --create mergetest -- import --connect jdbc:mysql://<host name>/<database> --table test --incremental lastmodified --check-column created_at --last-value max_dt --username hive -P -m 1 --target-dir new
5 执行job
sqoop job --exec mergetest
6 合并操作
cp /tmp/sqoop-${USER}/compile/028e88f8a52c3505710a26375853e7ff/test.jar .
sqoop merge --class-name test --new-data new --onto old --target-dir merge --jar-file ./test.jar --merge-key id
7 导入hive
create table test(id int,val int,created_at string) row format delimited fields terminated by ',';
load data local INPATH "merge/part-r-00000" into table test;
8
hive> select * from test;
OK
0 0 2012-05-30 15:27:48.0
1 4 2012-05-30 15:28:33.0
2 14 2012-05-30 15:28:33.0
3 210 2012-05-31 10:09:28.0