1、工具准备
JDK(1.8 以上,推荐 1.8)
Python(2,3 版本都可以)
#下载jdk
#wget https:///jdk-8u331-linux-x64.tar.gz
tar -zxvf jdk-8u331-linux-x64.tar.gz -C /usr/local/java
#配置jdk
cat <<END >> /etc/profile
export JAVA_HOME=/usr/local/java/jdk1.8.0_331
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib:$CLASSPATH
export JAVA_PATH=${JAVA_HOME}/bin:${JRE_HOME}/bin
export PATH=$PATH:${JAVA_PATH}
END
#生效
source /etc/profile
java -version
#安装 DataX 软件
wget http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz
tar -zxvf datax.tar.gz -C /usr/local/
rm -rf /usr/local/datax/plugin/*/._* # 需要删除隐藏文件 (重要)
2、配置验证
cd /usr/local/datax/bin
python datax.py ../job/job.json # 用来验证是否安装成功
2022-07-11 17:46:28.906 [job-0] INFO JobContainer -
任务启动时刻 : 2022-07-11 17:46:18
任务结束时刻 : 2022-07-11 17:46:28
任务总计耗时 : 10s
任务平均流量 : 253.91KB/s
记录写入速度 : 10000rec/s
读出记录总数 : 100000
读写失败总数 : 0
#查看 streamreader --> streamwriter 的模板:
python /usr/local/datax/bin/datax.py -r streamreader -w streamwriter
DataX (DATAX-OPENSOURCE-3.0), From Alibaba !
Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.
Please refer to the streamreader document:
https://github.com/alibaba/DataX/blob/master/streamreader/doc/streamreader.md
Please refer to the streamwriter document:
https://github.com/alibaba/DataX/blob/master/streamwriter/doc/streamwriter.md
Please save the following configuration as a json file and use
python {DATAX_HOME}/bin/datax.py {JSON_FILE_NAME}.json
to run the job.
{
"job": {
"content": [
{
"reader": {
"name": "streamreader",
"parameter": {
"column": [],
"sliceRecordCount": ""
}
},
"writer": {
"name": "streamwriter",
"parameter": {
"encoding": "",
"print": true
}
}
}
],
"setting": {
"speed": {
"channel": ""
}
}
}
}
3、模板解释
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader", # 读取端
"parameter": {
"column": [*], # 需要同步的列 (* 表示所有的列)
"connection": [
{
"jdbcUrl": [
"jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf8"
], # 连接信息
"table": ["test"] # 连接表
}
],
"password": "root", # 连接用户
"username": "pwd@123", # 连接密码
"where": "" # 描述筛选条件
}
},
"writer": {
"name": "mysqlwriter", # 写入端
"parameter": {
"column": ["*"], # 需要同步的列
"connection": [
{
"jdbcUrl": "jdbc:mysql://127.0.0.1:3306/deltest?useUnicode=true&characterEncoding=utf8", # 连接信息
"table": [] # 连接表这个表一定要存在
}
],
"password": "pwd@123", # 连接密码
"preSql": ["truncate test"], # 同步前. 要做的事
"session": ["set session sql_mode='ANSI'"],
"username": "root", # 连接用户
"writeMode": "insert" # 操作类型
}
}
}
],
"setting": {
"speed": {
"channel": "5" # 指定并发数
}
}
}
}
4、正式使用模板
vim install.json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "pwd@123",
"column": ["*"],
"connection": [
{
"jdbcUrl": [
"jdbc:mysql://127.0.0.1:3306/test"
],
"table": ["test"]
}
]
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"column": ["*"],
"connection": [
{
"jdbcUrl": "jdbc:mysql://127.0.0.1:3306/test_del?useUnicode=true&characterEncoding=utf8",
"table": ["test"]
}
],
"password": "pwd@123",
"preSql": [
""
],
"session": [
"set session sql_mode='ANSI'"
],
"username": "root",
"writeMode": "insert"
}
}
}
],
"setting": {
"speed": {
"channel": "5"
}
}
}
}
5、同步测试
#同步的数据库和表自行创建
python /usr/local/data/bin/data.py install.json
2022-07-11 18:02:59.680 [job-0] INFO JobContainer -
任务启动时刻 : 2022-07-11 18:02:48
任务结束时刻 : 2022-07-11 18:02:59
任务总计耗时 : 10s
任务平均流量 : 1B/s
记录写入速度 : 0rec/s
读出记录总数 : 2
读写失败总数 : 0