hadoop学习整理——mapreduce数据分析案例(3)--topN

数据样例:
数据样例: xx(每次的考试分数 次数不固定)
课程 姓名 xx xx xx xx

computer,huangxiaoming,85,86,41,75,93,42,85
computer,xuzheng,54,52,86,91,42
computer,huangbo,85,42,96,38
english,zhaobenshan,54,52,86,91,42,85,75
english,liuyifei,85,41,75,21,85,96,14
algorithm,liuyifei,75,85,62,48,54,96,15
computer,huangjiaju,85,75,86,85,85
english,liuyifei,76,95,86,74,68,74,48
english,huangdatou,48,58,67,86,15,33,85
algorithm,huanglei,76,95,86,74,68,74,48
algorithm,huangjiaju,85,75,86,85,85,74,86
computer,huangdatou,48,58,67,86,15,33,85
english,zhouqi,85,86,41,75,93,42,85,75,55,47,22
english,huangbo,85,42,96,38,55,47,22
algorithm,liutao,85,75,85,99,66
computer,huangzitao,85,86,41,75,93,42,85
math,wangbaoqiang,85,86,41,75,93,42,85
computer,liujialing,85,41,75,21,85,96,14,74,86
computer,liuyifei,75,85,62,48,54,96,15
computer,liutao,85,75,85,99,66,88,75,91
computer,huanglei,76,95,86,74,68,74,48
english,liujialing,75,85,62,48,54,96,15
math,huanglei,76,95,86,74,68,74,48
math,huangjiaju,85,75,86,85,85,74,86
math,liutao,48,58,67,86,15,33,85
english,huanglei,85,75,85,99,66,88,75,91
math,xuzheng,54,52,86,91,42,85,75
math,huangxiaoming,85,75,85,99,66,88,75,91
math,liujialing,85,86,41,75,93,42,85,75
english,huangxiaoming,85,86,41,75,93,42,85
algorithm,huangdatou,48,58,67,86,15,33,85
algorithm,huangzitao,85,86,41,75,93,42,85,75

需求:
1.求出每门课程参考学生成绩第二高的学生的信息:课程,姓名和平均分
2.自定义序列化类,分区,map和reduce

1.自定义javabean类

public class Scorebean implements WritableComparable<Scorebean> {

    private String course;
    private String name;
    private double avgscore;//平均分

    public Scorebean() {
    }

    public void set(String course, String name, double avgscore) {
        this.course = course;
        this.name = name;
        this.avgscore = avgscore;
    }

    public String getCourse() {
        return course;
    }

    public void setCourse(String course) {
        this.course = course;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public double getAvgscore() {
        return avgscore;
    }

    public void setAvgscore(double avgscore) {
        this.avgscore = avgscore;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(course);
        out.writeUTF(name);
        out.writeDouble(avgscore);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        this.course = in.readUTF();
        this.name = in.readUTF();
        this.avgscore = in.readDouble();
    }

    /**
     * 排序规则
     * compareTo方法既充当排序用,用充当分组规则
     */
    @Override
    public int compareTo(Scorebean cs) {

        int result;

        //首先比较课程
        int i = course.compareTo(cs.getCourse());
        //int i = course.compareTo(cs.getCourse());

        if (i > 0){
            result=1;
        }else if(i < 0){
            result =-1;
        }else{
            //如果进入到这里 意味着两个课程一样 此时根据平均分倒序进行排序
            result = avgscore > cs.getAvgscore() ? -1:(avgscore < cs.getAvgscore() ? 1:0);
        }

        return result;
    }

    @Override
    public String toString() {
        return course + "\t" + name + "\t" + avgscore;
    }
}

2.自定义分组类

public class ScoreGroupingComparator extends WritableComparator {

    protected ScoreGroupingComparator(){
        super(Scorebean.class,true);
    }


    @Override
    public int compare(WritableComparable a, WritableComparable b) {
        //类型转换
        Scorebean aBean = (Scorebean) a;
        Scorebean bBean = (Scorebean) b;

        //本需求中 分组规则是,只要前后两个数据的Course一样 就应该分到同一组。
        //只要compare 返回0  mapreduce框架就认为两个一样  返回不为0 就认为不一样
        return aBean.getCourse().compareTo(bBean.getCourse());
    }
}

3.Mapper类

public class Mapper_CS extends Mapper<LongWritable, Text, Scorebean, NullWritable> {
        /*
        //输入的K,V对:LongWritable(起始偏移量<key值在这行中的索引位置>), Text(这行的内容)
        //输出的K,V对:Scorebean, NullWritable,由需求决定
    //Scorebean中定义了String course, String name, double avgscore三个需要的变量,三个变量不可分割
    此需求也只需要用到此三个变量,以此作为输出的K值,V值由NullWritable补全
    --->也可在Scorebean中只定义了String course, String name两个变量,以此作为输出的K值,那V值就得为avgscore,
    V值类型就可写为doublewritable
    */

    Scorebean keyOut = new Scorebean();//指定输出变量的 K值

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        String[] splits = value.toString().split(",");
        //String course = splits[0];
        //String name = splits[1];

        int sum = 0;
        for(int i=2; i<splits.length; i++){
            sum += Integer.valueOf(splits[i]);
        }
        double avgScore = Math.round(sum * 10.0/(splits.length-2))/10D;

        keyOut.set(splits[0],splits[1],avgScore);

        /*keyOut.setCourse(course);
        keyOut.setName(name);
        keyOut.setAvgscore(avgScore);*/

        context.write(keyOut, NullWritable.get());
        //若V值就得为avgscore,doublewritable类型,则此行为context.write(keyOut, doublewritable);
    }
}

4.Reducer类

public class Reducer_CS extends Reducer<Scorebean, NullWritable, Scorebean, NullWritable> {

    @Override
    protected void reduce(Scorebean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {

        int num = 0;
        //求第二名
        for(NullWritable v: values){
            num ++;
            //System.out.println(key);
            //System.out.println(v);
            if(num == 2){
                context.write(key, v);
                break;
            }
        }
        /*//求前两个,top2
        for(NullWritable nvl: values){
            context.write(key, nvl);
            num ++;
            if(num == 2){
                break;
            }
        }*/
    }
}

5.Driver类实现

public class CSTop02Driver {
    public static void main(String[] args) throws Exception{
        //配置文件对象
        Configuration conf = new Configuration();
        // 创建作业实例
        Job job = Job.getInstance(conf, CSTop02Driver.class.getSimpleName());
        // 设置作业驱动类
        //conf.set("mapreduce.framework.name","yarn");
        job.setJarByClass(CSTop02Driver.class);

        // 设置作业mapper reducer类
        job.setMapperClass(Mapper_CS.class);
        job.setReducerClass(Reducer_CS.class);

        // 设置作业mapper阶段输出key value数据类型
        job.setMapOutputKeyClass(Scorebean.class);
        job.setMapOutputValueClass(NullWritable.class);
        //设置作业reducer阶段输出key value数据类型 也就是程序最终输出数据类型
        job.setOutputKeyClass(Scorebean.class);
        job.setOutputValueClass(NullWritable.class);

        //todo 设置自定义分组
        job.setGroupingComparatorClass(ScoreGroupingComparator.class);

        // 配置作业的输入数据路径
        //FileInputFormat.addInputPath(job, new Path(args[0]));
        FileInputFormat.addInputPath(job, new Path("D:\\data\\score\\input"));

        // 配置作业的输出数据路径
        //FileOutputFormat.setOutputPath(job, new Path(args[1]));
        FileOutputFormat.setOutputPath(job, new Path("D:\\data\\score\\output_top2"));
        //判断输出路径是否存在 如果存在删除
        FileSystem fs = FileSystem.get(conf);
        if(fs.exists(new Path("D:\\data\\score\\output_top2"))){
            fs.delete(new Path("D:\\data\\score\\output_top2"),true);
        }

        // 提交作业并等待执行完成
        boolean resultFlag = job.waitForCompletion(true);
        //程序退出
        System.exit(resultFlag ? 0 :1);
    }
}

结果展示:

在这里插入图片描述

  • 2
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值