由于现在公司弃用了Sqoop转而使用DateX来实现数据的端到端的传输,因此我对DateX进行了调研以及来解决数据从起点到端点的传输,以Mysql到HDFS为例
在MySQL中存在数据表和数据,现在需要将这部分数据导入到HDFS,使用DateX,在MySQL中以tbl_logs表为例,表中有数据37W+,根据需求,在Linux中创建文档,补充配置
1、读流程reader
2、写流程writer
3、同名前缀:append模式下
4、成功运行示例
5、完整的参数配置
{ "job": { "content": [ { "reader": { "name": "mysqlreader", "parameter": { "column": [ "id", "log_id" , "remote_ip" , "site_global_ticket" , "site_global_session" , "global_user_id" , "cookie_text" , "user_agent" , "ref_url" , "loc_url" , "log_time" ], "where": "id>=3", "connection": [ { "jdbcUrl": [ "jdbc:mysql://Your IP:3306/tags_dat" ], "table": [ "tbl_logs" ] } ], "password": "123456", "splitPk": "", "username": "root" } }, "writer": { "name": "hdfswriter", "parameter": { "column": [ { "name": "id", "type": "bigint" }, { "name": "log_id", "type": "string" }, { "name": "remote_ip", "type": "string" }, { "name": "site_global_ticket", "type": "string" }, { "name": "site_global_session", "type": "string" }, { "name": "global_user_id", "type": "bigint" }, { "name": "cookie_text", "type": "string" }, { "name": "user_agent", "type": "string" }, { "name": "ref_url", "type": "string" }, { "name": "loc_url", "type": "string" }, { "name": "log_time", "type": "string" } ], "compress": "gzip", "defaultFS": "hdfs://Your IP:8020", "fieldDelimiter": "\t", "fileName": "tbl_logs", "fileType": "text", "path": "/user/hive/warehouse/tags_dat.db", "writeMode": "append" } } } ], "setting": { "speed": { "channel": 1 } } } }