dataX 在Linux的安装部署与测试方案

本文介绍了在Linux环境下安装dataX 3.0的步骤,并提供了Mysql到Hive以及Hive到Mysql的数据迁移测试过程。首先确保安装了Hadoop、Hive、JDK和Python等依赖,然后从指定地址下载dataX并解压。测试包括编写Mysql和Hive的建表及迁移脚本,将json脚本放在dataX/bin目录下,通过运行python datax.py命令执行迁移任务。迁移过程中需要注意数据类型的兼容性问题。
摘要由CSDN通过智能技术生成
  1. 环境准备

Hadoop2.7.3

Hive1.2.1

JDK1.8

Python2.7 centOS系统默认自带

Mysql5.7

dataX3.0

下载地址:http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz

  1. 测试脚本准备
    1. Mysql建表脚本与数据脚本

DROP TABLE IF EXISTS dim_area;

CREATE TABLE IF NOT EXISTS dim_area (id BIGINT COMMENT '',name STRING COMMENT '地区名称',parent_id BIGINT COMMENT '') COMMENT '' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

 

INSERT INTO dim_area (id, name, parent_id) VALUES (110000, '北京市', null);

INSERT INTO dim_area (id, name, parent_id) VALUES (110100, '北京市', 110000);

INSERT INTO dim_area (id, name, parent_id) VALUES (110101, '东城区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110102, '西城区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110103, '朝阳区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110104, '丰台区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110105, '石景山区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110106, '海淀区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110107, '门头沟区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110108, '房山区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110109, '通州区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110110, '顺义区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110111, '昌平区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110112, '大兴区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110113, '怀柔区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110114, '平谷区', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110115, '密云县', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (110116, '延庆县', 110100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120000, '天津市', null);

INSERT INTO dim_area (id, name, parent_id) VALUES (120100, '天津市', 120000);

INSERT INTO dim_area (id, name, parent_id) VALUES (120101, '和平区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120102, '河东区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120103, '河西区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120104, '南开区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120105, '河北区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120106, '红桥区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120107, '滨海新区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120108, '东丽区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120109, '西青区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120110, '津南区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120111, '北辰区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120112, '武清区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120113, '宝坻区', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120114, '宁河县', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120115, '静海县', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (120116, '蓟县', 120100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130000, '河北省', null);

INSERT INTO dim_area (id, name, parent_id) VALUES (130100, '石家庄市', 130000);

INSERT INTO dim_area (id, name, parent_id) VALUES (130102, '长安区', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130103, '桥东区', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130104, '桥西区', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130105, '新华区', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130107, '井陉矿区', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130108, '裕华区', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130121, '井陉县', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130123, '正定县', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130124, '栾城县', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130125, '行唐县', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130126, '灵寿县', 130100);

INSERT INTO dim_area (id, name, parent_id) VALUES (130127, '高邑县', 130100);

    1. Hive的建表语句

DROP TABLE IF EXISTS dim_area;

CREATE TABLE

    IF NOT EXISTS dim_area

    (

        id BIGINT COMMENT '',

        name STRING COMMENT '地区名称',

        parent_id BIGINT COMMENT ''

    )

    COMMENT '' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

    1. Mysql到Hive的迁移Json脚本(dim_area2.json)

{

    "job": {

        "setting": {

            "speed": {

                "channel": 10

            }

        },

        "content": [

            {

                "reader": {

                    "name": "mysqlreader",

                    "parameter": {

                        "username": "root",

                        "password": "root",

                        "connection": [

                            {

                                "querySql": [

                                    "SELECT id, name, parent_id FROM dim_area;"

                                ],

                                "jdbcUrl": [

                                    "jdbc:mysql://192.168.10.107:3306/zmdwdb"

                                ]

                            }

                        ]

                    }

                },

                "writer": {

                    "name": "hdfswriter",

                    "parameter": {

       

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值