datax任务中的json配置
- content(datax单例任务配置主体内容)
-reader(reader的配置信息)
-name(reader的插件名)
-parameter(reader的具体参数)
-writer(writer的配置信息)
-name(writer的插件名)
-parameter(writer的具体参数)
- setting(datax单例任务设置)
### 下载地址
http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz
开源地址:https://github.com/alibaba/DataX
通过以下指令登入hadoop12节点的sftp窗口sftp root@hadoop12
区分本地还是远端就看指令带不带了l
get a.txt 把hadoop12中的a.txt下载到本地
put a.txt 把本地中的a.txt下载到hadoop12上
lpwd 显示local就是本地
exit 退出登录
sftp 端口号是22
#hadoop10到hadoop12添加免密 ll ~/.ssh 有个(id_rsa.pub证明做过免密) ssh-copy-id hadoop12即可
datax常用指令
#执行官方案例,测试datax是否可以正常工作
[root@hadoop10 conf]# python /opt/installs/datax/bin/datax.py /opt/installs/datax/job/job.json
#模板指令 (经常使用)
-- python datax.py -r streamreader -w streamwriter
#执行指令(写好的json)
python /opt/installs/datax/bin/datax.py /opt/installs/datax/job/test_job.json
#导入datax的json模板到文件mysql2hdsf.json中
python /opt/installs/datax/bin/datax.py -r mysqlreader -w hdfswriter > /opt/installs/datax/job/test_job.json
dataxweb的启动
# 首先切换到dataxweb的项目目录中:
cd /opt/installs/datax-web-2.1.2/
# 然后运行bin目录下的启动脚本:
./bin/start-all.sh
# 需要停止服务的话可以执行bin目录中的停止脚本:
./bin/stop-all.sh
#端口地址
http://hadoop13:9527/index.html
用户名:admin
密码:123456
01打印10次自己名字
{
"job": {
"content": [
{
"reader": {
"name": "streamreader",
"parameter": {
"column": [
{
"type":"string",
"value":"jiejunchong"
},
{
"type":"long",
"value":23
}
],
"sliceRecordCount": 20
}
},
"writer": {
"name": "streamwriter",
"parameter": {
"encoding": "utf-8",
"print": true
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
02mysql02mysql
#把hadoop10的mysql表导入到 hadoop12中的mysql表;
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": ["id","uid","nick_name","achievement","level","job","register_time"],
"connection": [
{
"jdbcUrl": ["jdbc:mysql://hadoop10:3306/test1"],
"table": ["user_info"]
}
],
"password": "123456",
"username": "root",
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"column": ["id","uid","nick_name","achievement","level","job","register_time"],
"connection": [
{
"jdbcUrl": "jdbc:mysql://hadoop12:3306/test_1?characterEncoding=UTF-8",
"table": ["user_info"]
}
],
"password": "123456",
"preSql": ["truncate table user_info"],
"session": [],
"username": "root",
"writeMode": "insert"
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
03mysql03hdfs
# hadoop10的mysql表 导入到10的hdfs上
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"connection": [
{
"jdbcUrl": ["jdbc:mysql://hadoop10:3306/test1"],
"querySql": [
" select uid,case when char_length(nick_name)>13
then concat(SUBSTRING(nick_name,1,10),'...') else nick_name end as nick_name
from user_info
where char_length(nick_name)>10;"
]
}
],
"password": "123456",
"username": "root",
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"column": [
{
"name": "uid",
"type": "INT"
},
{
"name": "nick_name",
"type": "VARCHAR"
}
],
"compress": "",
"defaultFS": "hdfs://hadoop10:9000",
"fieldDelimiter": "\t",
"fileName": "user_info.txt",
"fileType": "text",
"path": "/test_datax/mysql2hdfs",
"writeMode": "append"
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
04mysql2ftp
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": ["id","name","age"],
"connection": [
{
"jdbcUrl": ["jdbc:mysql://hadoop10:3306/test1?characterEncoding=UTF-8"],
"table": ["t_user"]
}
],
"password": "123456",
"username": "root"
}
},
"writer": {
"name": "ftpwriter",
"parameter": {
"connectPattern": "PASV",
"dateFormat": "yyyy.MM.dd",
"encoding": "utf-8",
"fieldDelimiter": "\t",
"fileFormat": "text",
"fileName": "from_mysql",
"header": ["id","name","age"],
"host": "hadoop12",
"nullFormat": "\\N",
"password": "root",
"path": "/opt/data",
"port": "22",
"protocol": "sftp",
"timeout": "600000",
"username": "root",
"writeMode": "append"
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
05ftp2hdfs(ftp-hive)
{
"job": {
"content": [
{
"reader": {
"name": "ftpreader",
"parameter": {
"column": [
{ "index":0, "type":"long"},
{ "index":1, "type":"string"},
{ "index":2, "type":"string"}, { "index": 3, "type": "data":"format":"yyyy.MM.dd"}
],
"encoding": "UTF-8",
"fieldDelimiter": "\t",
"host": "hadoop10",
"password": "root",
"path": ["/opt/data/from_mysql"],
"port": "22",
"protocol": "sftp",
"username": "root",
"skipHeader":true,
"nullFormat":"\\n"
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"column": [
{"name":"col1", "type":"INT"}
{"name":"col2", "type":"string"}
{"name":"col3", "type":"string"}
],
"defaultFS": "hdfs://hdfs-cluster",
"fieldDelimiter": ",",
"fileName": "from_ftp",
"fileType": "text",
"path": "/from_datax",
"writeMode": "append"
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
06hdfs0mysql(hive-mysql)
{
"job": {
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"column": [
{"index":0,"type":"long"},
{"index":1,"type":"string"},
{"index":2,"type":"string"}
],
"defaultFS": "hdfs://hdfs-cluster",
"hadoopConfig":{
"dfs.nameservices": "hdfs-cluster",
"dfs.ha.namenodes.hdfs-cluster": "nn1,nn2",
"dfs.namenode.rpc-address.hdfs-cluster.nn1": "hadoop11:9000",
"dfs.namenode.rpc-address.hdfs-cluster.nn2": "hadoop12:9000",
"dfs.client.failover.proxy.provider.hdfs-cluster": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
},
"encoding": "UTF-8",
"fieldDelimiter": ",",
"fileType": "text",
"path": "/from_datax/from_ftp*"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"column": ["id","name","age"],
"connection": [
{
"jdbcUrl": "jdbc:mysql://hadoop12:3306/test_1?characterEncoding=UTF-8",
"table": ["t_user"]
}
],
"password": "123456",
"preSql": ["truncate table t_user"],
"session": [],
"username": "root",
"writeMode": "insert"
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}