项目场景:
在写MapReduce程序的时候,不时也会遇到将需要程序结果保存到Mysql这种情况
注意点:
hadoop自带将数据输出到Mysql的OutPutFormat,及DBOutputFormat
使用DBOutputFormat时需要注意几个点:
1、需要实现一个实体类,这个实体类部分映射数据库中要查询的表的字段。
2、实体类需要实现Writable与DBWritable两个接口,DBWritable的实现类负责查询与写入,Writable的实现类负责序列化输出。
3、参数索引与列索引要一致。
4、需要在Mapper阶段或者Reduce阶段的输出类型中将key类型设置为该实体类。
5、数据库与数据表需要存在。
6、字段名要一致,且类型一致。
7、字段与输入数据顺序一致。
下面,使用一个简单的WordCount案例来简单演示一下如何使用DBOutputFormat。
Mapper类:
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class DB_Mapper extends Mapper <LongWritable, Text,Text, IntWritable> {
Text k = new Text();
IntWritable v = new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] fields = value.toString().split("\t");
for (String field : fields) {
k.set(field);
context.write(k,v);
}
}
}
Reduce类
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class DB_Reduce extends Reducer <Text, IntWritable,DB_Bean, NullWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum=0;
for (IntWritable value : values) {
sum +=value.get();
}
DB_Bean bean = new DB_Bean();
bean.setField(key.toString());
bean.setCount(sum);
context.write(bean,NullWritable.get());
}
}
自定义实体类
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
//实现 DBWritable,Writable接口并重写它们相应的方法
public class DB_Bean implements DBWritable, Writable {
private String field;
private int count;
public DB_Bean() {
}
public DB_Bean(String field, int count) {
this.field = field;
this.count = count;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(field);
dataOutput.writeInt(count);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
field = dataInput.readUTF();
count = dataInput.readInt();
}
@Override
public void write(PreparedStatement statement) throws SQLException {
// 设置参数索引以及参数
statement.setString(1,this.getField());
statement.setInt(2,this.getCount());
}
@Override
public void readFields(ResultSet resultSet) throws SQLException {
// 列索引
this.field = resultSet.getString(1);
this.count = resultSet.getInt(2);
}
public String getField() {
return field;
}
public void setField(String field) {
this.field = field;
}
public int getCount() {
return count;
}
public void setCount(int count) {
this.count = count;
}
}
驱动类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class DB_Driver implements Tool {
Configuration conf=null;
@Override
public int run(String[] strings) throws Exception {
// 设置数据库参数 conf,driverClass,dbUrl,username,password
DBConfiguration.configureDB(conf,"com.mysql.jdbc.Driver","jdbc:mysql://localhost:3306/mydb","root","000000");
Job job = Job.getInstance(conf);
job.setJarByClass(DB_Driver.class);
job.setMapperClass(DB_Mapper.class);
job.setReducerClass(DB_Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(DB_Mapper.class);
job.setOutputValueClass(NullWritable.class);
// 设置 OutputFormat为DBOutputFormat
job.setOutputFormatClass(DBOutputFormat.class);
FileInputFormat.setInputPaths(job,new Path(strings[0]));
// 设置表名与字段名
DBOutputFormat.setOutput(job,"wc","field","count");
boolean b = job.waitForCompletion(true);
return b ? 0:1;
}
@Override
public void setConf(Configuration configuration) {
conf = configuration;
}
@Override
public Configuration getConf() {
return conf;
}
public static void main(String[] args) throws Exception {
args = new String[]{"C:\\Users\\86173\\Desktop\\测试\\guo3_2清洗"};
ToolRunner.run(new Configuration(),new DB_Driver(),args);
}
}