datax支持mysql数据同步到hive
参考 http://www.imooc.com/article/259497
1.首先创建mysql表
CREATE TABLE `user` ( `id` int(11) NOT NULL AUTO_INCREMENT, `username` varchar(50) NOT NULL, `telephone` varchar(30) DEFAULT NULL, `mail` varchar(50) DEFAULT NULL, `password` varchar(32) DEFAULT NULL, `remark` varchar(1000) DEFAULT NULL, `status` int(11) NOT NULL, `operator` varchar(50) NOT NULL, `operate_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, `operate_ip` varchar(20) NOT NULL, PRIMARY KEY (`id`), UNIQUE KEY `idx_username` (`username`) USING BTREE, UNIQUE KEY `idx_mail` (`mail`) USING BTREE ) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8;
2.插入数据
3.创建hive外部表
create table mysql_to_hive ( id int, username string, telephone string, mail string ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS ORCFILE;
4.获取hdfs存储地址
hive>show create table mysql_to_hive;
5.查看hdfs目录下是否有文件
hdfs dfs -ls /user/hive/warehouse/test.db/mysql_to_hive
没有文件
6.配置文件
vim mysql2hive.json
注意:hdfswriter写入时的字段分隔符,**需要用户保证与创建的Hive表的字段分隔符一致,否则无法在Hive表中查到数据
{ "job": { "setting": { "speed": { "channel":1 } }, "content": [ { "reader": { "name": "mysqlreader", "parameter": { "username": "user", "password": "password", "connection": [ { "querySql": [ "select id,username,telephone,mail from user;" ], "jdbcUrl": [ "jdbc:mysql://localhost:3306/datax" ] } ] } }, "writer": { "name": "hdfswriter", "parameter": { "defaultFS": "hdfs://hadoop001:8020", "fileType": "orc", "path": "/user/hive/warehouse/mysql_to_hive", "fileName": "mysql_to_hive", "column": [ { "name": "id", "type": "STRING" }, { "name": "username", "type": "STRING" }, { "name": "telephone", "type": "STRING" }, { "name": "mail", "type": "STRING" } ], "writeMode": "append", "fieldDelimiter": "\t", "compress": "NONE" } } } ] } }
7.执行:
python bin/datax.py /data/dataxJob/mysql_to_hive.json
8.检查执行结果
查看hdfs目录
hive查询表格