朝花夕拾

记录技术生涯的点点滴滴

使用sqoop api完成mysql到hadoop的导入

程序目的:从已有的mysql数据导入到hdfs中,使用java编程的方式减少安装sqoop本地环境的麻烦。

数据准备阶段

CREATE DATABASE test;
USE test;

CREATE TABLE `vote_record` (
  `id`          INT(11)     NOT NULL AUTO_INCREMENT,
  `user_id`     VARCHAR(20) NOT NULL,
  `vote_id`     INT(11)     NOT NULL,
  `group_id`    INT(11)     NOT NULL,
  `create_time` DATETIME    NOT NULL,
  PRIMARY KEY (`id`),
  KEY `index_user_id` (`user_id`) USING HASH
)
  ENGINE = INNODB
  AUTO_INCREMENT = 1
  DEFAULT CHARSET = utf8;

CREATE FUNCTION `rand_string`(n INT)
  RETURNS VARCHAR(255) CHARSET latin1
  BEGIN
    DECLARE chars_str VARCHAR(100) DEFAULT 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
    DECLARE return_str VARCHAR(255) DEFAULT '';
    DECLARE i INT DEFAULT 0;
    WHILE i < n DO
      SET return_str = concat(return_str, substring(chars_str, FLOOR(1 + RAND() * 62), 1));
      SET i = i + 1;
    END WHILE;
    RETURN return_str;
  END;

CREATE PROCEDURE `add_vote_record`(IN n INT)
  BEGIN
    DECLARE i INT DEFAULT 1;
    WHILE (i <= n) DO
      INSERT INTO vote_record_memory (user_id, vote_id, group_id, create_time)
      VALUES (rand_string(20), FLOOR(RAND() * 1000), FLOOR(RAND() * 100), now());
      SET i = i + 1;
    END WHILE;
  END;

CALL add_vote_record(1000000);

maven依赖

    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.11</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.46</version>
        </dependency>
        <dependency>
            <groupId>org.apache.sqoop</groupId>
            <artifactId>sqoop</artifactId>
            <version>1.4.7</version>
            <classifier>hadoop260</classifier>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.9.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.9.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.9.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-common</artifactId>
            <version>2.9.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.avro</groupId>
            <artifactId>avro</artifactId>
            <version>1.8.2</version>
        </dependency>
    </dependencies>

执行代码

package cn.hackcoder;

import org.apache.hadoop.conf.Configuration;
import org.apache.sqoop.Sqoop;
import org.apache.sqoop.tool.SqoopTool;
import org.apache.sqoop.util.OptionsFileUtil;

/**
 * Created by linzhichao on 2018/5/15.
 */
public class SqoopUtils {
    private static int importDataFromMysql() throws Exception {
        String[] args = new String[]{
                "--connect", "jdbc:mysql://127.0.0.1:3306/test",
                "--driver", "com.mysql.jdbc.Driver",
                "-username", "root",
                "-password", "system",
                "--table", "vote_record",
                "-m", "1",
                "--target-dir", "/user/root/import_vote_record"
        };

        String[] expandArguments = OptionsFileUtil.expandArguments(args);

        SqoopTool tool = SqoopTool.getTool("import");

        Configuration conf = new Configuration();
        conf.set("fs.default.name", "hdfs://127.0.0.1:9000");//设置HDFS服务地址
        conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
        conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
        Configuration loadPlugins = SqoopTool.loadPlugins(conf);

        Sqoop sqoop = new Sqoop((com.cloudera.sqoop.tool.SqoopTool) tool, loadPlugins);
        return Sqoop.runSqoop(sqoop, expandArguments);
    }

    private static int listTablesFromMysql() throws Exception {
        String[] args = new String[]{
                "--connect", "jdbc:mysql://127.0.0.1:3306/test",
                "-username", "root",
                "-password", "system",
        };

        String[] expandArguments = OptionsFileUtil.expandArguments(args);

        Configuration pluginConf = SqoopTool.loadPlugins(new Configuration());
        com.cloudera.sqoop.tool.SqoopTool tool = (com.cloudera.sqoop.tool.SqoopTool) SqoopTool
                .getTool("list-tables");
        if (null == tool) {
            System.err.println("No such sqoop tool: list-tables See 'sqoop help'.");
        }
        Sqoop sqoop = new Sqoop(tool, pluginConf);
        return Sqoop.runSqoop(sqoop, expandArguments);
    }

    public static void main(String[] args) throws Exception {
        System.out.println(listTablesFromMysql());
        importDataFromMysql();
    }
}

  • 使用中需要特别注意hadoop的版本需要与maven中hadoop配置的版本兼容。
  • 使用mysql开启binlog模式时,创建mysql函数需要执行
    set global log_bin_trust_function_creators=TRUE;
阅读更多
文章标签: sqoop hdfs mysql
想对作者说点什么? 我来说一句

没有更多推荐了,返回首页

不良信息举报

使用sqoop api完成mysql到hadoop的导入

最多只允许输入30个字

加入CSDN,享受更精准的内容推荐,与500万程序员共同成长!
关闭
关闭