mapreduce mysql hdfs_使用MapReduce将mysql数据导入HDFS

package com.zhen.mysqlToHDFS;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

import java.sql.PreparedStatement;

import java.sql.ResultSet;

import java.sql.SQLException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.Writable;

import org.apache.hadoop.mapred.lib.db.DBWritable;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;

import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;/**

* @author FengZhen

* 将mysql数据导入hdfs*/

public classDBInputFormatApp extends Configured implements Tool {/**

* JavaBean

* 需要实现Hadoop序列化接口Writable以及与数据库交互时的序列化接口DBWritable

* 官方API中解释如下:

* public class DBInputFormat

* extends InputFormat implements Configurable

* 即Mapper的Key是LongWritable类型,不可改变;Value是继承自DBWritable接口的自定义JavaBean*/

public static classBeanWritable implements Writable, DBWritable {private intid;privateString name;private doubleheight;public voidreadFields(ResultSet resultSet) throws SQLException {this.id = resultSet.getInt(1);this.name = resultSet.getString(2);this.height = resultSet.getDouble(3);

}public voidwrite(PreparedStatement preparedStatement) throws SQLException {

preparedStatement.setInt(1, id);

preparedStatement.setString(2, name);

preparedStatement.setDouble(3, height);

}public voidreadFields(DataInput dataInput) throws IOException {this.id =dataInput.readInt();this.name =dataInput.readUTF();this.height =dataInput.readDouble();

}public voidwrite(DataOutput dataOutput) throws IOException {

dataOutput.writeInt(id);

dataOutput.writeUTF(name);

dataOutput.writeDouble(height);

}

@OverridepublicString toString() {return id + "\t" + name + "\t" +height;

}

}/**

* Map

* 当Map的输出key为LongWritable,value为Text时,reduce可以省略不写,默认reduce也是输出LongWritable:Text

**/

public static class DBInputMapper extends Mapper{privateLongWritable outputKey;privateText outputValue;

@Overrideprotected void setup(Mapper.Context context)

throws IOException, InterruptedException {this.outputKey = newLongWritable();this.outputValue = newText();

}

@Overrideprotected voidmap(LongWritable key, BeanWritable value,

Mapper.Context context)

throws IOException, InterruptedException {

outputKey.set(key.get());;

outputValue.set(value.toString());

context.write(outputKey, outputValue);

}

}public intrun(String[] arg0) throws Exception {

Configuration configuration=getConf();//配置当前作业需要使用的JDBC配置

DBConfiguration.configureDB(configuration, "com.mysql.jdbc.Driver", "jdbc:mysql://localhost:3306/hadoop","root", "123qwe");

Job job= Job.getInstance(configuration, DBInputFormatApp.class.getSimpleName());

job.setJarByClass(DBInputFormatApp.class);

job.setMapperClass(DBInputMapper.class);

job.setMapOutputKeyClass(LongWritable.class);

job.setMapOutputValueClass(Text.class);

job.setOutputKeyClass(LongWritable.class);

job.setOutputValueClass(Text.class);//配置作业的输入数据格式

job.setInputFormatClass(DBInputFormat.class);//配置当前作业需要查询的sql语句及接收sql语句的bean

DBInputFormat.setInput(

job,

BeanWritable.class,"select * from people","select count(1) from people");

FileOutputFormat.setOutputPath(job,new Path(arg0[0]));return job.waitForCompletion(true) ? 0 : 1;

}public static intcreateJob(String[] args) {

Configuration conf= newConfiguration();

conf.set("dfs.datanode.socket.write.timeout", "7200000");

conf.set("mapreduce.input.fileinputformat.split.minsize", "268435456");

conf.set("mapreduce.input.fileinputformat.split.maxsize", "536870912");int status = 0;try{

status= ToolRunner.run(conf,newDBInputFormatApp(), args);

}catch(Exception e) {

e.printStackTrace();

}returnstatus;

}public static voidmain(String[] args) {

args= new String[] { "/user/hadoop/mapreduce/mysqlToHdfs/people"};int status =createJob(args);

System.exit(status);

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值