使用sqoop api完成mysql到hadoop的导入


程序目的:从已有的mysql数据导入到hdfs中,使用java编程的方式减少安装sqoop本地环境的麻烦。

数据准备阶段
CREATE DATABASE test;
USE test;

CREATE TABLE `vote_record` (
  `id`          INT(11)     NOT NULL AUTO_INCREMENT,
  `user_id`     VARCHAR(20) NOT NULL,
  `vote_id`     INT(11)     NOT NULL,
  `group_id`    INT(11)     NOT NULL,
  `create_time` DATETIME    NOT NULL,
  PRIMARY KEY (`id`),
  KEY `index_user_id` (`user_id`) USING HASH
)
  ENGINE = INNODB
  AUTO_INCREMENT = 1
  DEFAULT CHARSET = utf8;

CREATE FUNCTION `rand_string`(n INT)
  RETURNS VARCHAR(255) CHARSET latin1
  BEGIN
    DECLARE chars_str VARCHAR(100) DEFAULT 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
    DECLARE return_str VARCHAR(255) DEFAULT '';
    DECLARE i INT DEFAULT 0;
    WHILE i < n DO
      SET return_str = concat(return_str, substring(chars_str, FLOOR(1 + RAND() * 62), 1));
      SET i = i + 1;
    END WHILE;
    RETURN return_str;
  END;

CREATE PROCEDURE `add_vote_record`(IN n INT)
  BEGIN
    DECLARE i INT DEFAULT 1;
    WHILE (i <= n) DO
      INSERT INTO vote_record (user_id, vote_id, group_id, create_time)
      VALUES (rand_string(20), FLOOR(RAND() * 1000), FLOOR(RAND() * 100), now());
      SET i = i + 1;
    END WHILE;
  END;

CALL add_vote_record(1000000);
maven依赖
    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.11</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.46</version>
        </dependency>
        <dependency>
            <groupId>org.apache.sqoop</groupId>
            <artifactId>sqoop</artifactId>
            <version>1.4.7</version>
            <classifier>hadoop260</classifier>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.9.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.9.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.9.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-common</artifactId>
            <version>2.9.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.avro</groupId>
            <artifactId>avro</artifactId>
            <version>1.8.2</version>
        </dependency>
    </dependencies>
执行代码
package cn.hackcoder;

import org.apache.hadoop.conf.Configuration;
import org.apache.sqoop.Sqoop;
import org.apache.sqoop.tool.SqoopTool;
import org.apache.sqoop.util.OptionsFileUtil;

/**
 * Created by linzhichao on 2018/5/15.
 */
public class SqoopUtils {
    private static int importDataFromMysql() throws Exception {
        String[] args = new String[]{
                "--connect", "jdbc:mysql://127.0.0.1:3306/test",
                "--driver", "com.mysql.jdbc.Driver",
                "-username", "root",
                "-password", "system",
                "--table", "vote_record",
                "-m", "1",
                "--target-dir", "/user/root/import_vote_record"
        };

        String[] expandArguments = OptionsFileUtil.expandArguments(args);

        SqoopTool tool = SqoopTool.getTool("import");

        Configuration conf = new Configuration();
        conf.set("fs.default.name", "hdfs://127.0.0.1:9000");//设置HDFS服务地址
        conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
        conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
        Configuration loadPlugins = SqoopTool.loadPlugins(conf);

        Sqoop sqoop = new Sqoop((com.cloudera.sqoop.tool.SqoopTool) tool, loadPlugins);
        return Sqoop.runSqoop(sqoop, expandArguments);
    }

    private static int listTablesFromMysql() throws Exception {
        String[] args = new String[]{
                "--connect", "jdbc:mysql://127.0.0.1:3306/test",
                "-username", "root",
                "-password", "system",
        };

        String[] expandArguments = OptionsFileUtil.expandArguments(args);

        Configuration pluginConf = SqoopTool.loadPlugins(new Configuration());
        com.cloudera.sqoop.tool.SqoopTool tool = (com.cloudera.sqoop.tool.SqoopTool) SqoopTool
                .getTool("list-tables");
        if (null == tool) {
            System.err.println("No such sqoop tool: list-tables See 'sqoop help'.");
        }
        Sqoop sqoop = new Sqoop(tool, pluginConf);
        return Sqoop.runSqoop(sqoop, expandArguments);
    }

    public static void main(String[] args) throws Exception {
        System.out.println(listTablesFromMysql());
        importDataFromMysql();
    }
}

  • 使用中需要特别注意hadoop的版本需要与maven中hadoop配置的版本兼容。
  • 使用mysql开启binlog模式时,创建mysql函数需要执行
    set global log_bin_trust_function_creators=TRUE;
  • 3
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值