实验数据
x.txt : 45 3 78 456 70 1 999
y.txt: 1123 7 66 67 123
z.txt: 798 0 35 29 6 250
代码
mapper类
V2 处理成每行的值,K2 为 Null
从文档以Text形式读值然后转换为IntWritable写入
package mrTop3;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class TopNMapper extends Mapper<Object, Text,NullWritable,IntWritable> {
private IntWritable num = new IntWritable();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens())
{
num.set(Integer.parseInt(itr.nextToken()));
context.write(NullWritable.get(),num);
}
}
}
reducer类
k3 为Null,V3为三个数值字符串Text
reduce 方法:将 k2 写入一个 List,不输出
cleanup 方法:从 List中取出三个最大值, context.write 输出;
package mrTop3;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class TopNReducer extends Reducer<NullWritable,IntWritable, NullWritable,Text> {
//定义一个整数列表
List<Integer> numlist =new ArrayList<Integer>();
public void reduce(NullWritable key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
//将集合中数据装入list
for (IntWritable val : values) {
numlist.add(val.get());
}
}
@Override
protected void cleanup(Reducer<NullWritable, IntWritable, NullWritable, Text>.Context context)
throws IOException, InterruptedException {
Collections.sort(numlist); //利用 Collections 类进行升序
Collections.reverse(numlist); //反转
String numliststr=new String();
Integer datalength=3;
for(int i=0;i<datalength;i++) {
numliststr+=numlist.get(i).toString()+",";
}
numliststr=numliststr.substring(0,numliststr.length()-1);
//k2 为 null 或者空的 Text, 输出三个数,以逗号分隔
context.write(NullWritable.get(),new Text(numliststr));
}
}
main
package mrTop3;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class TopNMain {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(TopNMain.class);
job.setMapperClass(TopNMapper.class);
job.setReducerClass(TopNReducer.class);
job.setNumReduceTasks(1);
job.setMapOutputKeyClass(NullWritable.class);// map阶段的输出的key
job.setMapOutputValueClass(IntWritable.class);// map阶段的输出的value
job.setOutputKeyClass(NullWritable.class);// reduce阶段的输出的key
job.setOutputValueClass(Text.class);// reduce阶段的输出的value
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean res = job.waitForCompletion(true);
System.exit(res ? 0 : 1);
}
}