09 mapreduce序列化应用二——统计总分

最新推荐文章于 2022-12-22 12:46:28 发布

qq_34352013

最新推荐文章于 2022-12-22 12:46:28 发布

阅读量278

点赞数

分类专栏： hadoop Reduce

本文链接：https://blog.csdn.net/qq_34352013/article/details/104892128

版权

hadoop 同时被 2 个专栏收录

33 篇文章 0 订阅

订阅专栏

Reduce

20 篇文章 1 订阅

订阅专栏

准备数据

Bob 90 64 92
Alex 64 63 68
Grace 57 86 24
Henry 39 79 78
Adair 88 82 64
Chad 66 74 37
Colin 64 86 74
Eden 71 85 43
Grover 99 86 43

序列化类

写出啥就读到啥

package serialscore;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class Score implements Writable {
    //用数组来存储考试分数
    private String name = "";
    private int[] scores = new int[0];

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public int[] getScores() {
        return scores;
    }

    public void setScores(int[] scores) {
        this.scores = scores;
    }


    @Override
    public void write(DataOutput dataOutput) throws IOException {
        //写出姓名
        dataOutput.writeUTF(this.name);
        //没有直接写出数组这个方法，所以将数组拆成长度和元素分别传输
        //先写长度
        dataOutput.writeInt(this.scores.length);
        //在写分数
        for(int i:scores){
            dataOutput.writeInt(i);
        }
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.name = dataInput.readUTF();
        //第二个读到的应该是一个整数
        int length = dataInput.readInt();
        this.scores = new int[length];
        for(int i=0;i<length;i++){
            scores[i] = dataInput.readInt();
        }
    }
}

mapper类

package serialscore;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class ScoreMapper extends Mapper<LongWritable, Text,Text,Score> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] s = value.toString().split(" ");
        Score score = new Score();
        score.setName(s[0]);
        int[] scores = new int[s.length-1];
        for(int i=1;i<s.length;i++){
            scores[i-1] = Integer.parseInt(s[i]);
        }
        score.setScores(scores);
        context.write(new Text(score.getName()),score);
    }
}

Reducer类

package serialscore;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.junit.Test;

import java.io.IOException;
import java.text.DecimalFormat;
import java.util.Arrays;

public class ScoreReducer extends Reducer<Text,Score,Text,Text> {
    @Override
    protected void reduce(Text key, Iterable<Score> values, Context context) throws IOException, InterruptedException {
        int sum = 0;
        int count = 0;//统计有多少科目
        for(Score s : values){
            int[] scores = s.getScores();
            sum+=Arrays.stream(scores).sum();
            count+=scores.length;
        }
        //求平均分
        double average = sum/count;
        //保留两位小数
        DecimalFormat df = new DecimalFormat("0.00");
        String format = df.format(average);
        context.write(key,new Text(format));
    }
}

Driver

package serialscore;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class ScoreDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Job job = Job.getInstance(new Configuration());
        job.setJarByClass(ScoreDriver.class);
        job.setMapperClass(ScoreMapper.class);
        job.setReducerClass(ScoreReducer.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Score.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        FileInputFormat.addInputPath(job,new Path("hdfs://hadoop01:9000/txt/score.txt"));
        FileOutputFormat.setOutputPath(job,new Path("hdfs://hadoop01:9000/result/ruanwenfu"));
        job.waitForCompletion(true);
    }
}

qq_34352013

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
09 mapreduce序列化应用二——统计总分

准备数据Bob 90 64 92Alex 64 63 68Grace 57 86 24Henry 39 79 78Adair 88 82 64Chad 66 74 37Colin 64 86 74Eden 71 85 43Grover 99 86 43序列化类写出啥就读到啥package serialscore;import org.apache.hadoop.io.W...
复制链接

扫一扫

专栏目录