package com.zhen.mysqlToHDFS;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;/**
* @author FengZhen
* 将mysql数据导入hdfs*/
public classDBInputFormatApp extends Configured implements Tool {/**
* JavaBean
* 需要实现Hadoop序列化接口Writable以及与数据库交互时的序列化接口DBWritable
* 官方API中解释如下:
* public class DBInputFormat
* extends InputFormat implements Configurable
* 即Mapper的Key是LongWritable类型,不可改变;Value是继承自DBWritable接口的自定义JavaBean*/
public static classBeanWritable implements Writable, DBWritable {private intid;privateString name;private doubleheight;public voidreadFields(ResultSet resultSet) throws SQLException {this.id = resultSet.getInt(1);this.name = resultSet.getString(2);this.height = resultSet.getDouble(3);
}public voidwrite(PreparedStatement preparedStatement) throws SQLException {
preparedStatement.setInt(1, id);
preparedStatement.setString(2, name);
preparedStatement.setDouble(3, height);
}public voidreadFields(DataInput dataInput) throws IOException {this.id =dataInput.readInt();this.name =dataInput.readUTF();this.height =dataInput.readDouble();
}public voidwrite(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(id);
dataOutput.writeUTF(name);
dataOutput.writeDouble(height);
}
@OverridepublicString toString() {return id + "\t" + name + "\t" +height;
}
}/**
* Map
* 当Map的输出key为LongWritable,value为Text时,reduce可以省略不写,默认reduce也是输出LongWritable:Text
**/
public static class DBInputMapper extends Mapper{privateLongWritable outputKey;privateText outputValue;
@Overrideprotected void setup(Mapper.Context context)
throws IOException, InterruptedException {this.outputKey = newLongWritable();this.outputValue = newText();
}
@Overrideprotected voidmap(LongWritable key, BeanWritable value,
Mapper.Context context)
throws IOException, InterruptedException {
outputKey.set(key.get());;
outputValue.set(value.toString());
context.write(outputKey, outputValue);
}
}public intrun(String[] arg0) throws Exception {
Configuration configuration=getConf();//配置当前作业需要使用的JDBC配置
DBConfiguration.configureDB(configuration, "com.mysql.jdbc.Driver", "jdbc:mysql://localhost:3306/hadoop","root", "123qwe");
Job job= Job.getInstance(configuration, DBInputFormatApp.class.getSimpleName());
job.setJarByClass(DBInputFormatApp.class);
job.setMapperClass(DBInputMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);//配置作业的输入数据格式
job.setInputFormatClass(DBInputFormat.class);//配置当前作业需要查询的sql语句及接收sql语句的bean
DBInputFormat.setInput(
job,
BeanWritable.class,"select * from people","select count(1) from people");
FileOutputFormat.setOutputPath(job,new Path(arg0[0]));return job.waitForCompletion(true) ? 0 : 1;
}public static intcreateJob(String[] args) {
Configuration conf= newConfiguration();
conf.set("dfs.datanode.socket.write.timeout", "7200000");
conf.set("mapreduce.input.fileinputformat.split.minsize", "268435456");
conf.set("mapreduce.input.fileinputformat.split.maxsize", "536870912");int status = 0;try{
status= ToolRunner.run(conf,newDBInputFormatApp(), args);
}catch(Exception e) {
e.printStackTrace();
}returnstatus;
}public static voidmain(String[] args) {
args= new String[] { "/user/hadoop/mapreduce/mysqlToHdfs/people"};int status =createJob(args);
System.exit(status);
}
}