sudo tar -zxf datax.tar.gz
sudo chown -R hadoop:hadoop datax
#测试安装是否成功
python datax.py ../job/job.json
#从HDFS中读数据,写到mysql 中,先用下面的语句生成配置模板
# -r -w 可在plugin 中找到
python datax.py -r hdfsreader -w mysqlwriter
配置后的模板如下:
{
"job": {
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"column": ["*"],
"defaultFS": "hdfs://server01:9000",
"encoding": "UTF-8",
"fieldDelimiter": "|",
"fileType": "text",
"path": "/user/hive/warehouse/employee_contract"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"column": ["*"],
"connection": [
{
"jdbcUrl": "jdbc:mysql://localhost:3306/test",
"table": ["employee_contract"]
}
],
"password": "1234567",
"preSql": [],
"session": [],
"username": "root",
"writeMode": "replace"
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
运行数据同步
python datax.py ../job/hdfs2mysql.json