DataX数据同步测试

DataX数据同步测试

一.MySQL全量数据同步

1.MySQL建表

-- ambari03.devinkim.com:3306/bitestdb   root 123456
USE `bitestdb`;
DROP TABLE IF EXISTS `emp`;
CREATE TABLE `emp` (
  `id` int(11) DEFAULT NULL,
  `name` varchar(100) DEFAULT NULL,
  `deg` varchar(100) DEFAULT NULL,
  `salary` int(11) DEFAULT NULL,
  `dept` varchar(10) DEFAULT NULL,
  `create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
  `update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  `is_delete` bigint(20) DEFAULT '1'
);

insert  into `emp`(`id`,`name`,`deg`,`salary`,`dept`,`create_time`,`update_time`,`is_delete`) values (1201,'gopal','manager',50000,'TP','2018-06-17 18:54:32','2019-01-17 11:19:32',1),(1202,'manishahello','Proof reader',50000,'TPP','2018-06-15 18:54:32','2018-06-17 18:54:32',0),(1203,'khalillskjds','php dev',30000,'AC','2018-06-17 18:54:32','2019-03-14 09:18:27',1),(1204,'prasanth_xxx','php dev',30000,'AC','2018-06-17 18:54:32','2019-04-07 09:09:24',1),(1205,'kranthixxx','admin',20000,'TP','2018-06-17 18:54:32','2018-12-08 11:50:33',0),(1206,'garry','manager',50000,'TPC','2018-12-10 21:41:09','2018-12-10 21:41:09',1),(1207,'oliver','php dev',2000,'AC','2018-12-15 13:49:13','2018-12-15 13:49:13',1),(1208,'hello','phpDev',200,'TP','2018-12-16 09:41:48','2018-12-16 09:41:48',1),(1209,'ABC','HELLO',300,NULL,'2018-12-16 09:42:04','2018-12-16 09:42:24',1),(1210,'HELLO','HELLO',5800,'TP','2019-01-24 09:02:43','2019-01-24 09:02:43',1),(1211,'WORLD','TEST',8800,'AC','2019-01-24 09:03:15','2019-01-24 09:03:15',1),(1212,'sdfs','sdfsdf',8500,'AC','2019-03-13 22:01:38','2019-03-13 22:01:38',1),(1213,NULL,'sdfsdf',9800,'sdfsdf','2019-03-14 09:08:31','2019-03-14 09:08:54',1),(1214,'xxx','sdfsdf',9500,NULL,'2019-03-14 09:13:32','2019-03-14 09:13:44',0),(1215,'sdfsf','sdfsdfsdf',9870,'TP','2019-04-07 09:10:39','2019-04-07 09:11:18',0),(1216,'hello','HELLO',5600,'AC','2019-04-07 09:37:05','2019-04-07 09:37:05',1),(1217,'HELLO2','hello2',7800,'TP','2019-04-07 09:37:40','2019-04-07 09:38:17',1);

2.Hive建表

sudo -u hdfs hdfs dfs -rmr /warehouse/tablespace/external/hive/whdb.db/emp/*
drop table if exists `whdb`.`emp`;
create external table `whdb`.`emp` (
    id int,
    name string,
    deg string,
    salary double,
    dept string,
    create_time timestamp,
    update_time timestamp,
    isdeleted string
) row format delimited fields terminated by '\u0001' STORED AS textfile;

3.开发datax配置文件

# 服务器:flink02.devinkim.com   目录:/devinkim/softwares/datax/datax/job
sudo -u hdfs hdfs dfs -rmr /warehouse/tablespace/external/hive/whdb.db/emp/*
cd /devinkim/softwares/datax/datax/job
vim mysql2hive.json

特别注意:hive存在hdfs上的数据默认是以’\001’分隔的(用vim打开文件会看到是以^A分隔)。所以构建任务书分隔符要指定为 “fieldDelimiter”: “\u0001”!!!

{
    "job": {
        "setting": {
            "speed": {
                 "channel":1
            }
        },
        "content": [
            {
                "reader": {
                    "name": "mysqlreader",
                    "parameter": {
                        "username": "root",
                        "password": "123456",
                        "connection": [
                            {
                                "querySql": [
                                    "select * from emp where id < 1208;"
                                ],
                                "jdbcUrl": [
                                    "jdbc:mysql://ambari03.devinkim.com:3306/bitestdb"
                                ]
                            }
                        ]
                    }
                },
                 
				"writer": {
                    "name": "hdfswriter",
                    "parameter": {
                        "defaultFS": "hdfs://ambari01.devinkim.com:8020",
                        "fileType": "text",
                        "path": "/warehouse/tablespace/external/hive/whdb.db/emp",
                        "fileName": "emp",
                        "column": [
                            {
                                "name": "id",
                                "type": "INT"
                            },
                            {
                                "name": "name",
                                "type": "STRING"
                            },
                            {
                                "name": "deg",
                                "type": "STRING"
                            },
                            {
                                "name": "salary",
                                "type": "DOUBLE"
                            },
                            {
                                "name": "dept",
                                "type": "STRING"
                            },
                            {
                                "name": "create_time",
                                "type": "TIMESTAMP"
                            },
                            {
                                "name": "update_time",
                                "type": "TIMESTAMP"
                            },
                            {
                                "name": "isdeleted",
                                "type": "STRING"
                            }
                        ],
                        "writeMode": "append",
                        "fieldDelimiter": "\u0001"
                    }
                }
            }
        ]
    }
}

4.启动DataX运行任务

cd /devinkim/softwares/datax/datax
python bin/datax.py  job/mysql2hive.json

5.查看数据

select * from `whdb`.`emp`;

二.MySQL增量、分区数据同步

1.MySQL建表

-- ambari03.devinkim.com:3306/bitestdb   root 123456
USE `bitestdb`;
DROP TABLE IF EXISTS `emp`;
CREATE TABLE `emp` (
  `id` int(11) DEFAULT NULL,
  `name` varchar(100) DEFAULT NULL,
  `deg` varchar(100) DEFAULT NULL,
  `salary` int(11) DEFAULT NULL,
  `dept` varchar(10) DEFAULT NULL,
  `create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
  `update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  `is_delete` bigint(20) DEFAULT '1'
);

INSERT  INTO `emp` VALUES (1001,'gopal','manager',50000,'TP','2020-09-14 13:51:57','2020-09-14 13:52:42',1);
INSERT  INTO `emp` VALUES (1002,'hdfs','hadoop',20000,'TP','2020-09-14 13:51:57','2020-09-14 13:52:42',1);

2.Hive建表

-- sudo -u hdfs hdfs dfs -rmr /warehouse/tablespace/external/hive/whdb.db/emp/*

drop table if exists `whdb`.`emp`;
create external table `whdb`.`emp` (
    id int,
    name string,
    deg string,
    salary int,
    dept string,
    create_time timestamp,
    update_time timestamp,
    isdeleted bigint 
) 
partitioned by (day string) 
row format delimited fields terminated by '\u0001' 
stored as orc 
location 'hdfs://ambari01.devinkim.com:8020/warehouse/tablespace/external/hive/whdb.db/emp';
show create table `whdb`.`emp`;

3.开发datax配置文件

increment-partition.json

{
  "job": {
    "setting": {
      "speed": {
        "channel": 3,
        "byte": 1048576
      },
      "errorLimit": {
        "record": 0,
        "percentage": 0.02
      }
    },
    "content": [
      {
        "reader": {
          "name": "mysqlreader",
          "parameter": {
            "username": "root",
            "password": "123456",
            "column": [
              "`id`",
              "`name`",
              "`deg`",
              "`salary`",
              "`dept`",
              "`create_time`",
              "`update_time`",
              "`is_delete`"
            ],
            "where": "`create_time` >= '${start_time}' AND `create_time` < '${end_time}'",
            "splitPk": "",
            "connection": [
              {
                "table": [
                  "emp"
                ],
                "jdbcUrl": [
                  "jdbc:mysql://ambari03.devinkim.com:3306/bitestdb"
                ]
              }
            ]
          }
        },
        "writer": {
          "name": "hdfswriter",
          "parameter": {
            "defaultFS": "hdfs://ambari01.devinkim.com:8020",
            "fileType": "orc",
            "path": "/warehouse/tablespace/external/hive/whdb.db/emp/${day}",
            "fileName": "ambari03-bitestdb-emp",
            "writeMode": "append",
            "fieldDelimiter": "\u0001",
            "column": [
              {
                "name": "id",
                "type": "int"
              },
              {
                "name": "name",
                "type": "string"
              },
              {
                "name": "deg",
                "type": "string"
              },
              {
                "name": "salary",
                "type": "double"
              },
              {
                "name": "dept",
                "type": "string"
              },
              {
                "name": "create_time",
                "type": "timestamp"
              },
              {
                "name": "update_time",
                "type": "timestamp"
              },
              {
                "name": "isdeleted",
                "type": "string"
              }
            ]
          }
        }
      }
    ]
  }
}

注意:where条件的两种写法

-- 1.使用数据库的时间(不推荐)
`create_time` >= DATE_FORMAT( DATE_SUB(CURDATE(), INTERVAL 1 DAY), '%Y-%m-%d 00:00:00') AND `create_time` < DATE_FORMAT(NOW(),'%Y-%m-%d 00:00:00')
-- 2.使用datax动态传参,传入起始时间(强烈推荐)
`create_time` >= '${start_time}' AND `create_time` < '${end_time}'

4.启动DataX运行任务

# sudo -u hdfs hdfs dfs -rmr /warehouse/tablespace/external/hive/whdb.db/emp/*
# sudo -u hdfs hdfs dfs -mkdir -p /warehouse/tablespace/external/hive/whdb.db/emp/$(date '+%Y-%m-%d' --date="-1 day")

python bin/datax.py  job/increment-partition.json -p "-Dday='$(date '+%Y-%m-%d' --date="-1 day")' -Dstart_time='$(date '+%Y-%m-%d 00:00:00' --date="-1 day")' -Dend_time='$(date '+%Y-%m-%d 00:00:00')'"

5.加载数据至分区

hivevariable.hql

use whdb;
load data inpath '/warehouse/tablespace/external/hive/whdb.db/emp/${day}' into table `whdb`.`emp` partition (day='${day}');
select * from emp where day = '${day}';

shell命令传参并运行hivevariable.hql

hive --hivevar day=$(date '+%Y-%m-%d' --date="-1 day") -f hivevariable.hql

三.MySQL全量(Datax-Web)

http://flink01:9527/index.html

账号:admin

密码:123456

1.创建MySQL数据源

在这里插入图片描述


2.创建Hive数据源

在这里插入图片描述

3.任务构建

先在Hive中创建好表

-- sudo -u hdfs hdfs dfs -rmr /yanxu-jin/datax/suv_complain_file/*
USE `whdb`;
DROP TABLE IF EXISTS `whdb`.`t_yyc_ts_info`;
CREATE EXTERNAL TABLE `t_yyc_ts_info`(                      
  `com_id` string COMMENT 'complain投诉编号',          
  `com_agent_name` string COMMENT 'complain经销商名称  周口京彩润,北京鹏元,长沙脸谱,南阳京润, 昆明孝尊 ',  
  `com_type_id` string COMMENT 'complain投诉类型id',
  `com_type_name` string COMMENT 'complain投诉类型名称 1001 产品质量 1002 售后投诉 1003 配件投诉 1004销售投诉 1005其它类投诉',  
  `com_car_type` string COMMENT 'complain投诉车型 BJ40L,BJ40 PLUS,B80C,B40皮卡,B40柴油版,B40V,B40L PLUS-柴油版,BJ80,BJ80汽油版,BJ40环塔冠军版,BJ40城市猎人版,BJ40PLUS域 ',  
  `com_info` string COMMENT 'complain投诉内容',        
  `com_time` string COMMENT 'complain投诉时间',        
  `com_dt_limit` string COMMENT 'complain最晚投诉处理期限 ',  
  `com_status` string COMMENT 'complain投诉处理状态,10待处理,11已处理',  
  `com_do_time` string COMMENT 'complain投诉处理时间',   
  `com_do` int COMMENT 'complain投诉处理周期  多少天'
) row format delimited fields terminated by '\u0001' 
  STORED AS TEXTFILE
  LOCATION '/yanxu-jin/datax/suv_complain_file/';

在这里插入图片描述


在这里插入图片描述


在这里插入图片描述


在这里插入图片描述

4.任务管理中添加任务

在这里插入图片描述

5.运行任务并查看日志

sudo -u hdfs hdfs dfs -rmr /yanxu-jin/datax/suv_complain_file/*

在这里插入图片描述

四.Oracle全量(Datax-Web)

1.Hive建表

DROP TABLE IF EXISTS `whdb`.`t_user_reg`;
CREATE TABLE `whdb`.`t_user_reg` (
  `cif_no` string  COMMENT '客户号',
  `cif_type` string  COMMENT '客户类型:10消费客户,11分销商,12代销商,13内部客户',
  `cif_type_name` string  COMMENT '客户类型名称:10消费客户,11分销商,12代销商,13内部客户',
  `cif_account` string  COMMENT '账号',
  `cus_tel` string  COMMENT '客户手机号',
  `active_flag` string  COMMENT '激活标志:1激活,0未激活',
  `cif_nickname` string  COMMENT '昵称',
  `reg_origin` string  COMMENT '注册来源:0手机,1PC,2柜台',
  `cif_origin` string  COMMENT '客户来源',
  `account_flag` string  COMMENT '开户标记:0未开户,1开户',
  `area_code` bigint  COMMENT '行政区划',
  `email` string  COMMENT '客户邮箱',
  `reg_time` timestamp  COMMENT '注册时间',
  `occ_time` timestamp   COMMENT '时间戳'
) row format delimited fields terminated by '\u0001' STORED AS orc 
LOCATION 'hdfs://ambari01.devinkim.com:8020/warehouse/tablespace/managed/hive/whdb.db/t_user_reg';

2.开发datax配置文件

{
  "job": {
    "setting": {
      "speed": {
        "channel": 3,
        "byte": 1048576
      },
      "errorLimit": {
        "record": 0,
        "percentage": 0.02
      }
    },
    "content": [
      {
        "reader": {
          "name": "oraclereader",
          "parameter": {
            "username": "EnJGXrfLkVvuqUICQB3tGg==",
            "password": "E8gaBXRn6CCJLyyE1uIA6w==",
            "column": [
              "\"CIF_NO\"",
              "\"CIF_TYPE\"",
              "\"CIF_TYPE_NAME\"",
              "\"CIF_ACCOUNT\"",
              "\"CUS_TEL\"",
              "\"ACTIVE_FLAG\"",
              "\"CIF_NICKNAME\"",
              "\"REG_ORIGIN\"",
              "\"CIF_ORIGIN\"",
              "\"ACCOUNT_FLAG\"",
              "\"AREA_CODE\"",
              "\"EMAIL\"",
              "\"REG_TIME\"",
              "\"OCC_TIME\""
            ],
            "splitPk": "",
            "connection": [
              {
                "table": [
                  "T_USER_REG"
                ],
                "jdbcUrl": [
                  " jdbc:oracle:thin:@10.90.21.26:1521:orcl"
                ]
              }
            ]
          }
        },
        "writer": {
          "name": "hdfswriter",
          "parameter": {
            "defaultFS": "hdfs://ambari01.devinkim.com:8020",
            "fileType": "orc",
            "path": "/warehouse/tablespace/managed/hive/whdb.db/t_user_reg",
            "fileName": "file_user_reg",
            "writeMode": "append",
            "fieldDelimiter": "\u0001",
            "column": [
              {
                "name": "cif_no",
                "type": "string"
              },
              {
                "name": "cif_type",
                "type": "string"
              },
              {
                "name": "cif_type_name",
                "type": "string"
              },
              {
                "name": "cif_account",
                "type": "string"
              },
              {
                "name": "cus_tel",
                "type": "string"
              },
              {
                "name": "active_flag",
                "type": "string"
              },
              {
                "name": "cif_nickname",
                "type": "string"
              },
              {
                "name": "reg_origin",
                "type": "string"
              },
              {
                "name": "cif_origin",
                "type": "string"
              },
              {
                "name": "account_flag",
                "type": "string"
              },
              {
                "name": "area_code",
                "type": "bigint"
              },
              {
                "name": "email",
                "type": "string"
              },
              {
                "name": "reg_time",
                "type": "timestamp"
              },
              {
                "name": "occ_time",
                "type": "timestamp"
              }
            ]
          }
        }
      }
    ]
  }
}

3.手动加载数据

load data inpath '/warehouse/tablespace/managed/hive/whdb.db/t_user_reg/*' into table `whdb`.`t_user_reg`;
select count(*) from `whdb`.`t_user_reg`;
select * from `whdb`.`t_user_reg` limit 10;
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值