一、实验目的及要求
1、熟悉mapreduce的combiner并行编程设计
2、培养独立设计并行程序的能力
二、实验原理与内容
内容:
假设一个年级有两个班级,数据分别在class1.csv和class2.csv中,求该年级的数学成绩平均值。数据第一列为学号,第二列为数学成绩。 要求,必须使用Combiner类,且最后输出一行数据,该行仅有一个平均值。
三、实验软硬件环境
Hadoop集群
四、实验过程(实验步骤、记录、数据、分析)
注:给出所有代码和最终结果截图,排版工整美观
代码如下:
MyDriver.java
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MyDriver {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "JobName");
job.setJarByClass(MyDriver.class);
// TODO: specify a mapper
job.setMapperClass(MyMapper.class);
job.setCombinerClass(MyCombiner.class);
// TODO: specify a reducer
job.setReducerClass(MyReducer.class);
//job.setNumReduceTasks(1);
// TODO: specify output types
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(IntWritable.class);
// TODO: specify input and output DIRECTORIES (not files)
FileInputFormat.setInputPaths(job, new Path("hdfs://0.0.0.0:9000/input"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://0.0.0.0:9000/output/avg_out"));
if (!job.waitForCompletion(true))
return;
}
}
MyMapper.java
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MyMapper extends Mapper<LongWritable,Text, IntWritable, MyWritable> {
private MyWritable my = new MyWritable();
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
my.setCount(1);
my.setAvg(Integer.parseInt(value.toString().split(“,”)[1]);
context.write(new IntWritable(1), my);
}
}
MyCombiner.java
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MyCombiner extends Reducer<IntWritable,MyWritable , IntWritable, MyWritable> {
private MyWritable result= new MyWritable();
public void reduce(IntWritable key, Iterable<MyWritable> values, Context context)
throws IOException, InterruptedException {
int count=0;
int sum=0;
// process values
for (MyWritable val : values) {
sum = sum+val.getCount()*val.getAvg();
count+=val.getCount();
}
result.setCount(count);
result.setAvg(sum/count);
context.write(key, result);
}
}
MyReducer.java
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MyReducer extends Reducer<IntWritable,MyWritable , NullWritable, IntWritable> {
private MyWritable result= new MyWritable();
public void reduce(IntWritable key, Iterable<MyWritable> values, Context context)
throws IOException, InterruptedException {
int count=0;
int sum=0;
// process values
for (MyWritable val : values) {
sum = sum+val.getCount()*val.getAvg();
count+=val.getCount();
}
result.setCount(count);
result.setAvg(sum/count);
context.write(NullWritable.get(), new IntWritable(sum/count));
}
}
MyWritable.java
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
public class MyWritable implements Writable {
private int count=0;
private int avg=0;
@Override
public String toString() {
return count +"," + avg ;
}
public int getCount() {
return count;
}
public void setCount(int count) {
this.count = count;
}
public int getAvg() {
return avg;
}
public void setAvg(int avg) {
this.avg = avg;
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
count=in.readInt();
avg=in.readInt();
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeInt(count);
out.writeInt(avg);
}
}
运行结果:
Avg设为int时:
Avg设为float时: