import org.apache.hadoop.fs.Path;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class MaxValueMR {
// Mapper类
public static class MaxValueMapper extends Mapper<LongWritable, Text, LongWritable, LongWritable> {
private LongWritable max = new LongWritable();
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] values = line.split(" ");
long maxValue = Long.MIN_VALUE;
// 找到每行的最大值
for (String val : values) {
long num = Long.parseLong(val);
if (num > maxValue) {
maxValue = num;
}
}
max.set(maxValue);
context.write(new LongWritable(1), max);
}
}
// Reducer类
public static class MinValueReducer extends Reducer<LongWritable, LongWritable, LongWritable, LongWritable> {
private LongWritable result = new LongWritable(Long.MAX_VALUE);
public void reduce(LongWritable key, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
// 找到所有最大值中的最小值
for (LongWritable val : values) {
long max = val.get();
if (max < result.get()) {
result.set(max);
}
}
context.write(new LongWritable(1), result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "MaxValueMR");
job.setJarByClass(MaxValueMR.class);
job.setMapperClass(MaxValueMapper.class);
job.setReducerClass(MinValueReducer.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setNumReduceTasks(1); // 只使用一个Reducer任务
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
实验3
最新推荐文章于 2024-01-21 10:30:00 发布
该代码示例展示了如何使用Hadoop的MapReduce框架来计算输入数据中的最大值。Mapper类解析每一行数据,找到最大值并写入到Reducer。Reducer进一步处理,从所有最大值中找出全局最小的最大值。最终结果输出到指定路径。
摘要由CSDN通过智能技术生成