DATAX脚本编写例子

datax任务中的json配置

- content(datax单例任务配置主体内容)
       -reader(reader的配置信息)
           -name(reader的插件名)
           -parameter(reader的具体参数)
       -writer(writer的配置信息)
           -name(writer的插件名)
           -parameter(writer的具体参数)
- setting(datax单例任务设置)

###  下载地址
http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz

开源地址:https://github.com/alibaba/DataX

通过以下指令登入hadoop12节点的sftp窗口sftp root@hadoop12

区分本地还是远端就看指令带不带了l

get a.txt   把hadoop12中的a.txt下载到本地
put a.txt   把本地中的a.txt下载到hadoop12上
lpwd		显示local就是本地
exit        退出登录
sftp		端口号是22
#hadoop10到hadoop12添加免密 ll ~/.ssh  有个(id_rsa.pub证明做过免密)    ssh-copy-id hadoop12即可

datax常用指令

#执行官方案例,测试datax是否可以正常工作
[root@hadoop10 conf]# python /opt/installs/datax/bin/datax.py /opt/installs/datax/job/job.json

#模板指令 (经常使用)
-- python datax.py -r streamreader -w streamwriter
#执行指令(写好的json)
python /opt/installs/datax/bin/datax.py /opt/installs/datax/job/test_job.json

#导入datax的json模板到文件mysql2hdsf.json中
python /opt/installs/datax/bin/datax.py -r mysqlreader -w hdfswriter >  /opt/installs/datax/job/test_job.json

dataxweb的启动

# 首先切换到dataxweb的项目目录中:
cd /opt/installs/datax-web-2.1.2/
# 然后运行bin目录下的启动脚本:
./bin/start-all.sh
# 需要停止服务的话可以执行bin目录中的停止脚本:
./bin/stop-all.sh
#端口地址
http://hadoop13:9527/index.html
用户名:admin
密码:123456

01打印10次自己名字

{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "streamreader",
                    "parameter": {
                        "column": [
						{
							"type":"string",
							"value":"jiejunchong"
						},
						{
							"type":"long",
							"value":23
						}
						],
                        "sliceRecordCount": 20
                    }
                },
                "writer": {
                    "name": "streamwriter",
                    "parameter": {
                        "encoding": "utf-8",
                        "print": true
                    }
                }
            }
        ],
        "setting": {
            "speed": {
                "channel": "1"
            }
        }
    }
}

02mysql02mysql

#把hadoop10的mysql表导入到 hadoop12中的mysql表;
{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "mysqlreader", 
                    "parameter": {
                        "column": ["id","uid","nick_name","achievement","level","job","register_time"], 
                        "connection": [
                            {
                                "jdbcUrl": ["jdbc:mysql://hadoop10:3306/test1"], 
                                "table": ["user_info"]
                            }
                        ], 
                        "password": "123456", 
                        "username": "root", 
                  
                    }
                }, 
                "writer": {
                    "name": "mysqlwriter", 
                    "parameter": {
                        "column": ["id","uid","nick_name","achievement","level","job","register_time"], 
                        "connection": [
                            {
                                "jdbcUrl": "jdbc:mysql://hadoop12:3306/test_1?characterEncoding=UTF-8",
                                "table": ["user_info"]
                            }
                        ], 
                        "password": "123456", 
                        "preSql": ["truncate table user_info"], 
                        "session": [], 
                        "username": "root", 
                        "writeMode": "insert"
                    }
                }
            }
        ], 
        "setting": {
            "speed": {
                "channel": "1"
            }
        }
    }
}

03mysql03hdfs

# hadoop10的mysql表  导入到10的hdfs上
{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "mysqlreader",
                    "parameter": {
                        "connection": [
                            {
                                "jdbcUrl": ["jdbc:mysql://hadoop10:3306/test1"],
                                "querySql": [
                                    "	select uid,case when char_length(nick_name)>13 
										then concat(SUBSTRING(nick_name,1,10),'...') else nick_name	end as nick_name
										from user_info
										where char_length(nick_name)>10;"
                                ]
                            }
                        ],
                        "password": "123456",
                        "username": "root",
                    }
                },
                "writer": {
                    "name": "hdfswriter", 
                    "parameter": {
                        "column": [
							{
                         		"name": "uid",
                         		"type": "INT"
                     		},
                     		{
                        		"name": "nick_name",
                         		"type": "VARCHAR"
                     		}
						], 
                        "compress": "", 
                        "defaultFS": "hdfs://hadoop10:9000", 
                        "fieldDelimiter": "\t", 
                        "fileName": "user_info.txt", 
                        "fileType": "text", 
                        "path": "/test_datax/mysql2hdfs", 
                        "writeMode": "append"
                    }
                }
            }
        ], 
        "setting": {
            "speed": {
                "channel": "1"
            }
        }
    }
}

04mysql2ftp

{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "mysqlreader", 
                    "parameter": {
                        "column": ["id","name","age"], 
                        "connection": [
                            {
                                "jdbcUrl": ["jdbc:mysql://hadoop10:3306/test1?characterEncoding=UTF-8"], 
                                "table": ["t_user"]
                            }
                        ], 
                        "password": "123456", 
                        "username": "root"
                    }
                }, 
                "writer": {
                    "name": "ftpwriter", 
                    "parameter": {
                        "connectPattern": "PASV", 
                        "dateFormat": "yyyy.MM.dd", 
                        "encoding": "utf-8", 
                        "fieldDelimiter": "\t", 
                        "fileFormat": "text", 
                        "fileName": "from_mysql", 
                        "header": ["id","name","age"], 
                        "host": "hadoop12", 
                        "nullFormat": "\\N", 
                        "password": "root", 
                        "path": "/opt/data", 
                        "port": "22", 
                        "protocol": "sftp", 
                        "timeout": "600000", 
                        "username": "root", 
                        "writeMode": "append"
                    }
                }
            }
        ], 
        "setting": {
            "speed": {
                "channel": "1"
            }
        }
    }
}

05ftp2hdfs(ftp-hive)

{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "ftpreader", 
                    "parameter": {
                        "column": [
                            { "index":0, "type":"long"},
                            { "index":1, "type":"string"},
                            { "index":2, "type":"string"},                                     	   						 { "index": 3, "type": "data":"format":"yyyy.MM.dd"}
                        ], 
                        "encoding": "UTF-8", 
                        "fieldDelimiter": "\t", 
                        "host": "hadoop10", 
                        "password": "root", 
                        "path": ["/opt/data/from_mysql"], 
                        "port": "22", 
                        "protocol": "sftp", 
                        "username": "root",
						"skipHeader":true,
						"nullFormat":"\\n"
                    }
                }, 
                "writer": {
                    "name": "hdfswriter", 
                    "parameter": {
                        "column": [
							{"name":"col1", "type":"INT"}
							{"name":"col2", "type":"string"}
							{"name":"col3", "type":"string"}
						], 
                        "defaultFS": "hdfs://hdfs-cluster", 
                        "fieldDelimiter": ",", 
                        "fileName": "from_ftp", 
                        "fileType": "text", 
                        "path": "/from_datax", 
                        "writeMode": "append"
                    }
                }
            }
        ], 
        "setting": {
            "speed": {
                "channel": "1"
            }
        }
    }
}

06hdfs0mysql(hive-mysql)

{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "hdfsreader", 
                    "parameter": {
                        "column": [
							{"index":0,"type":"long"},
							{"index":1,"type":"string"},
							{"index":2,"type":"string"}
						],
                        "defaultFS": "hdfs://hdfs-cluster", 
						"hadoopConfig":{
						"dfs.nameservices": "hdfs-cluster",
						"dfs.ha.namenodes.hdfs-cluster": "nn1,nn2",
						"dfs.namenode.rpc-address.hdfs-cluster.nn1": "hadoop11:9000",
						"dfs.namenode.rpc-address.hdfs-cluster.nn2": "hadoop12:9000",
						"dfs.client.failover.proxy.provider.hdfs-cluster": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
						},
                        "encoding": "UTF-8", 
                        "fieldDelimiter": ",", 
                        "fileType": "text", 
                        "path": "/from_datax/from_ftp*"
                    }
                }, 
                "writer": {
                    "name": "mysqlwriter", 
                    "parameter": {
                        "column": ["id","name","age"], 
                        "connection": [
                            {
                                "jdbcUrl": "jdbc:mysql://hadoop12:3306/test_1?characterEncoding=UTF-8", 
                                "table": ["t_user"]
                            }
                        ], 
                        "password": "123456", 
                        "preSql": ["truncate table t_user"], 
                        "session": [], 
                        "username": "root", 
                        "writeMode": "insert"
                    }
                }
            }
        ], 
        "setting": {
            "speed": {
                "channel": "1"
            }
        }
    }
}
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值