MapReduce编程——求Top3

实验数据

x.txt : 45 3 78 456 70 1 999
y.txt: 1123 7 66 67 123
z.txt: 798 0 35 29 6 250

代码

mapper类

V2 处理成每行的值,K2 为 Null
从文档以Text形式读值然后转换为IntWritable写入

package mrTop3;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class TopNMapper extends Mapper<Object, Text,NullWritable,IntWritable> {

	private IntWritable num = new IntWritable();
	public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
		StringTokenizer itr = new StringTokenizer(value.toString());
		while (itr.hasMoreTokens())
		{
			num.set(Integer.parseInt(itr.nextToken()));
			
			context.write(NullWritable.get(),num);
		}
	}
}

reducer类

k3 为Null,V3为三个数值字符串Text
reduce 方法:将 k2 写入一个 List,不输出
cleanup 方法:从 List中取出三个最大值, context.write 输出;

package mrTop3;

import java.io.IOException;
import java.util.*;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class TopNReducer extends Reducer<NullWritable,IntWritable, NullWritable,Text> {

	//定义一个整数列表
			List<Integer> numlist =new ArrayList<Integer>();

	public void reduce(NullWritable key, Iterable<IntWritable> values,
			Context context) throws IOException, InterruptedException {
		//将集合中数据装入list
		for (IntWritable val : values) {
		numlist.add(val.get());
		}
	}
	
	@Override
	protected void cleanup(Reducer<NullWritable, IntWritable, NullWritable, Text>.Context context)
			throws IOException, InterruptedException {
		Collections.sort(numlist); //利用 Collections 类进行升序
		Collections.reverse(numlist); //反转
		String numliststr=new String();
		Integer datalength=3;
		for(int i=0;i<datalength;i++) {
		numliststr+=numlist.get(i).toString()+",";
		}
		numliststr=numliststr.substring(0,numliststr.length()-1);
		//k2 为 null 或者空的 Text, 输出三个数,以逗号分隔
		context.write(NullWritable.get(),new Text(numliststr));
	}
}

main

package mrTop3;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class TopNMain {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);

        job.setJarByClass(TopNMain.class);
        job.setMapperClass(TopNMapper.class);
        job.setReducerClass(TopNReducer.class);

        job.setNumReduceTasks(1);

        job.setMapOutputKeyClass(NullWritable.class);// map阶段的输出的key
        job.setMapOutputValueClass(IntWritable.class);// map阶段的输出的value

        job.setOutputKeyClass(NullWritable.class);// reduce阶段的输出的key
        job.setOutputValueClass(Text.class);// reduce阶段的输出的value

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        boolean res = job.waitForCompletion(true);
        System.exit(res ? 0 : 1);
    }
}

运行结果

在这里插入图片描述

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值