【Hadoop学习项目】3. 求平均值 + 使用combine

无combine

0. 项目结构

在这里插入图片描述
目标:求出每个同学成绩的平均分

1. AvgDriver.java

package hadoop_test.avg_demo_03;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class AvgDriver {
/* 数据内容
tom 69
tom 88
tom 78
jary 109
jary 90
jary 81
jary 35
rose 23
rose 100
rose 230
 */
    public static void main(String[] args) throws Exception {

        System.setProperty("HADOOP_USER_NAME", "root");

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(AvgDriver.class);
        job.setMapperClass(AvgMapper.class);
        job.setReducerClass(AvgReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        FileInputFormat.setInputPaths(job, new Path("/hadoop_test/avg/avg.txt"));
        FileOutputFormat.setOutputPath(job, new Path("/hadoop_test/avg/result"));
        job.waitForCompletion(true);
    }
}

2. AvgMapper

package hadoop_test.avg_demo_03;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class AvgMapper extends Mapper<LongWritable,Text,Text,IntWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        // class01 69
        String line = value.toString();
        // 按空格进行分割,读取第一个数据,将其作为Key,例:class01
        String outkeys=line.split(" ")[0];
        // 按空格进行分割,读取第二个数据,将其作为Value,例:69
        // 将Text转化为十进制整数
        int outvalues=Integer.parseInt(line.split(" ")[1]);

        context.write(new Text(outkeys),new IntWritable(outvalues));
    }
}

3. AvgReducer

package hadoop_test.avg_demo_03;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class AvgReducer  extends Reducer<Text,IntWritable,Text,DoubleWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int flag=0;             // 计算考试次数
        int count=0;            // 计算考试成绩总和
        for (IntWritable value:
             values) {
            count+=value.get();
            flag+=1;
        }
        float re=count/flag;    // 求平均分

        context.write(new Text(key),new DoubleWritable(re));
    }
}

有combine

0. 项目结构

在这里插入图片描述

1. AvgDriver

package hadoop_test.avg_hmk_03;

import hadoop_test.Utils_hadoop;
import hadoop_test.word_count_demo_01.WordCountCombine;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class AvgDriver {
/* 数据内容
tom 69
tom 88
tom 78
jary 109
jary 90
jary 81
jary 35
rose 23
rose 100
rose 230
 */
    public static void main(String[] args) throws Exception {

        System.setProperty("HADOOP_USER_NAME", "root");

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(AvgDriver.class);

        job.setMapperClass(AvgMapper.class);
        job.setCombinerClass(AvgCombine.class);
        job.setReducerClass(AvgReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);         // 注意Map输出类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        FileInputFormat.setInputPaths(job, new Path("/hadoop_test/avg/avg.txt"));
        //FileOutputFormat.setOutputPath(job, new Path("/hadoop_test/avg/homework_result"));
        if( Utils_hadoop.testExist(conf,"/hadoop_test/avg/homework_result")){
            Utils_hadoop.rmDir(conf,"/hadoop_test/avg/homework_result");
        }
        FileOutputFormat.setOutputPath(job, new Path("/hadoop_test/avg/homework_result"));

        job.waitForCompletion(true);
    }
}

2. AvgMapper

package hadoop_test.avg_hmk_03;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class AvgMapper extends Mapper<LongWritable,Text,Text,Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        // class01 69
        String line = value.toString();
        // 按空格进行分割,读取第一个数据,将其作为Key,例:class01
        String outkeys=line.split(" ")[0];
        // 按空格进行分割,读取第二个数据,将其作为Value,例:69
        // 将Text转化为十进制整数
        int outvalues=Integer.parseInt(line.split(" ")[1]);
        System.out.println(outkeys + ":" + outvalues);

        context.write(new Text(outkeys),new Text(String.valueOf(outvalues)));
    }
}

注意:将Map输出的Value变为Text。因为目标Combine格式为:人名 : 总成绩_考试次数 , 因为Combine使用的Context与Mapper保持一致,因此Map输出的value也需设为Text。

3. AvgCombine

package hadoop_test.avg_hmk_03;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;


public class AvgCombine extends Reducer<Text, Text, Text, Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        int num = 0;             // 计算考试次数
        int count = 0;            // 计算考试成绩总和
        for(Text value: values){
            count += Integer.parseInt(value.toString());
            num += 1;
        }
        System.out.println(key + ":" + count + "_" + num);
        // 因各个平均值累加起来再平均后与总平均值不一定相等,故以 Tom 68_4 的格式输出给Reduce
        context.write(new Text(key), new Text(String.valueOf(count) + "_" + String.valueOf(num)));
    }
}

combiner继承于Reducer,相当于是在shuffle阶段经过sort后对其进行的局部reduce,代码逻辑与上述Reducer相同。主要区别在于输入数据的来源和输出数据的目标去向。

通过Iterable<>迭代器,将相同的key所拥有的值放到迭代器里。然后,使用for循环将值和累加进行数据合并。

注意:平均值必须整体求和后再除。若先求各部分平均值后,再将其累加求和再求平均值,则不一定与原值相等。

4. AvgReducer

package hadoop_test.avg_hmk_03;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class AvgReducer  extends Reducer<Text,Text,Text,DoubleWritable> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        int num = 0;             // 计算考试次数
        int count = 0;            // 计算考试成绩总和
        for (Text value:
             values) {
            String subCou = value.toString().split("_")[0];
            String subNum = value.toString().split("_")[1];
            count += Integer.parseInt(subCou);
            num += Integer.parseInt(subNum);
        }
        double re = count / num;    // 求平均分

        context.write(new Text(key),new DoubleWritable(re));

    }
}

  • 1
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

辰阳星宇

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值