大数据学习作业-hadoop

大数据学习作业-hadoop

题目

有三个文件,文件中每一行都是一个数字,请编写程序,对三个文件中的数字整体进行降序排列结果文件中每一行有两个数字,第一个数字代表排名,第二个数字代表原始数据。
期望结果如下
1 2
2 6
3 15
4 22
5 26
6 32
7 32
8 54
9 92

解题思路

1 由于要对数据进行全排序,所以应该所有的数据最终都由一个reduce task进行处理
2 使用降序排列自定义类并且重写比较器进行排序,map端的输入<LongWritable,Text>输出<自定义类,NullWritable>
3 数据排序之外,还要显示数据的序号,可以利用reduce一个task逐行对数据进行处理,定义静态变量,递增 reduce端输出<LongWriatble,自定义类>

自定义类

package com.lagou.hadoop.sort;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class SortBean implements WritableComparable<SortBean> {
    private Long value;

    public Long getValue() {
        return value;
    }

    public void setValue(Long value) {
        this.value = value;
    }

    @Override
	//降序排列
    public int compareTo(SortBean o) {
        long thisValue = this.value;
        long thatValue = o.value;
        return thisValue > thatValue ? -1 : (thisValue == thatValue ? 0 : 1);
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeLong(value);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.value=dataInput.readLong();
    }

    public String toString() {
        return Long.toString(this.value);
    }
}


mapper

package com.lagou.hadoop.sort;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class SortMapper extends Mapper<LongWritable, Text,SortBean, NullWritable> {
	//自定义对象
    private SortBean bean = new SortBean();
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        bean.setValue(Long.valueOf(value.toString()));
        context.write(bean,NullWritable.get());
    }
}

reducer

package com.lagou.hadoop.sort;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.Objects;

public class SortReducer extends Reducer<SortBean, NullWritable,LongWritable,SortBean> {
    private static LongWritable sort=new LongWritable(0);
    private static Long orgValue=null;
    @Override
    protected void reduce(SortBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
        for (NullWritable value : values) {
            //当不相等时序号才加一,相等的序号相同
            if(!Objects.equals(key.getValue(),orgValue)){
                sort.set(sort.get()+1);
                orgValue=key.getValue();
            }
            context.write(sort,key);
        }
    }
}

driver

package com.lagou.hadoop.sort;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class SortDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //创建Configuration
        Configuration configuration=new Configuration();
        //获取job
        Job wordCount = Job.getInstance(configuration, "sort_driver");
        //指定map和task类
        wordCount.setJarByClass(SortDriver.class);
        wordCount.setMapperClass(SortMapper.class);
        wordCount.setReducerClass(SortReducer.class);
        //设置map阶段输出的key-value
        wordCount.setMapOutputKeyClass(SortBean.class);
        wordCount.setMapOutputValueClass(NullWritable.class);
        //设置reduce阶段输出的key value
        wordCount.setOutputKeyClass(LongWritable.class);
        wordCount.setOutputValueClass(SortBean.class);
        //默认是一个reducetask,不用设置

        //输入和输出路径
        FileInputFormat.setInputPaths(wordCount,new Path("C:/lagou/test/homework/input"));
        FileOutputFormat.setOutputPath(wordCount,new Path("C:/lagou/test/homework/output"));
        //任务提交
        boolean b = wordCount.waitForCompletion(true);
        System.exit(b?0:1);
    }
}

运行结果

1 65223
2 5956
3 756
4 654
5 650
6 92
7 54
8 32
8 32
9 26
10 22
11 15
12 6
13 2

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值