mapreduce mysql hdfs_使用MapReduce将mysql数据导入HDFS

最新推荐文章于 2021-09-26 19:22:20 发布

Thomas Talhelm

最新推荐文章于 2021-09-26 19:22:20 发布

阅读量117

点赞数

文章标签： mapreduce mysql hdfs

本文链接：https://blog.csdn.net/weixin_33404412/article/details/113317174

版权

package com.zhen.mysqlToHDFS;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

import java.sql.PreparedStatement;

import java.sql.ResultSet;

import java.sql.SQLException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.Writable;

import org.apache.hadoop.mapred.lib.db.DBWritable;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;

import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;/**

* @author FengZhen

* 将mysql数据导入hdfs*/

public classDBInputFormatApp extends Configured implements Tool {/**

* JavaBean

* 需要实现Hadoop序列化接口Writable以及与数据库交互时的序列化接口DBWritable

* 官方API中解释如下：

* public class DBInputFormat

* extends InputFormat implements Configurable

* 即Mapper的Key是LongWritable类型，不可改变；Value是继承自DBWritable接口的自定义JavaBean*/

public static classBeanWritable implements Writable, DBWritable {private intid;privateString name;private doubleheight;public voidreadFields(ResultSet resultSet) throws SQLException {this.id = resultSet.getInt(1);this.name = resultSet.getString(2);this.height = resultSet.getDouble(3);

}public voidwrite(PreparedStatement preparedStatement) throws SQLException {

preparedStatement.setInt(1, id);

preparedStatement.setString(2, name);

preparedStatement.setDouble(3, height);

}public voidreadFields(DataInput dataInput) throws IOException {this.id =dataInput.readInt();this.name =dataInput.readUTF();this.height =dataInput.readDouble();

}public voidwrite(DataOutput dataOutput) throws IOException {

dataOutput.writeInt(id);

dataOutput.writeUTF(name);

dataOutput.writeDouble(height);

}

@OverridepublicString toString() {return id + "\t" + name + "\t" +height;

}

}/**

* Map

* 当Map的输出key为LongWritable，value为Text时，reduce可以省略不写，默认reduce也是输出LongWritable:Text

**/

public static class DBInputMapper extends Mapper{privateLongWritable outputKey;privateText outputValue;

@Overrideprotected void setup(Mapper.Context context)

throws IOException, InterruptedException {this.outputKey = newLongWritable();this.outputValue = newText();

}

@Overrideprotected voidmap(LongWritable key, BeanWritable value,

Mapper.Context context)

throws IOException, InterruptedException {

outputKey.set(key.get());;

outputValue.set(value.toString());

context.write(outputKey, outputValue);

}

}public intrun(String[] arg0) throws Exception {

Configuration configuration=getConf();//配置当前作业需要使用的JDBC配置

DBConfiguration.configureDB(configuration, "com.mysql.jdbc.Driver", "jdbc:mysql://localhost:3306/hadoop","root", "123qwe");

Job job= Job.getInstance(configuration, DBInputFormatApp.class.getSimpleName());

job.setJarByClass(DBInputFormatApp.class);

job.setMapperClass(DBInputMapper.class);

job.setMapOutputKeyClass(LongWritable.class);

job.setMapOutputValueClass(Text.class);

job.setOutputKeyClass(LongWritable.class);

job.setOutputValueClass(Text.class);//配置作业的输入数据格式

job.setInputFormatClass(DBInputFormat.class);//配置当前作业需要查询的sql语句及接收sql语句的bean

DBInputFormat.setInput(

job,

BeanWritable.class,"select * from people","select count(1) from people");

FileOutputFormat.setOutputPath(job,new Path(arg0[0]));return job.waitForCompletion(true) ? 0 : 1;

}public static intcreateJob(String[] args) {

Configuration conf= newConfiguration();

conf.set("dfs.datanode.socket.write.timeout", "7200000");

conf.set("mapreduce.input.fileinputformat.split.minsize", "268435456");

conf.set("mapreduce.input.fileinputformat.split.maxsize", "536870912");int status = 0;try{

status= ToolRunner.run(conf,newDBInputFormatApp(), args);

}catch(Exception e) {

e.printStackTrace();

}returnstatus;

}public static voidmain(String[] args) {

args= new String[] { "/user/hadoop/mapreduce/mysqlToHdfs/people"};int status =createJob(args);

System.exit(status);

}

Thomas Talhelm

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫