MapReduce基础测试(一)

最新推荐文章于 2022-10-15 20:14:38 发布

人间清醒vv子

最新推荐文章于 2022-10-15 20:14:38 发布

阅读量276

点赞数

文章标签： mapreduce 大数据 hadoop

本文链接：https://blog.csdn.net/qq_45925467/article/details/116421866

版权

MapReduce基础测试（一）

MapReduce基础测试（一）

字段解释: 门店名，营业额，开支额，年份

劲松店,600,350,2019年
劲松店,800,250,2020年
王府井店,1900,600,2020年
王府井店,2000,900,2019年
回龙观店,6700,1800,2020年
西单店,3000,1000,2019年
西单店,5000,1000,2020年
,3500,1000,2020年
牡丹园店,3800,1400,2020
牡丹园店,2800,1300,2019年
西直门店,1500,900,2019年
太阳宫店,9000,3600,2019年
三里屯店,,1000,2020年
西直门店,3500,1000,2020年
太阳宫店,6000,4600,2020年
回龙观店,7500,2000,2019年

需求1：去除源文件中字段缺失的数据

需求2：按照不同年份将营业数据拆分到不同的文件中

需求3：对每一年的营业数据按照净盈利排序（营业额-开支额）

需求4：要求最后输出到文件的数据字段之间以‘\t’分割，后边加两个描述字段：净盈利额、盈利或者亏损标记

如：

王府井店 1900 600 2020年 1300 盈利
劲松店 800 950 2020年 -150 亏损

准备

创建test26.txt文件，存入数据

需求1实现代码：

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

/**
 * 字段解释: 门店名，营业额，开支额，年份
 * 需求1：去除源文件中字段缺失的数据
 *
 */
public class Test01 extends Configured implements Tool {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        int status = ToolRunner.run(conf, new Test01(), args);
        System.exit(status);
    }

    public int run(String[] args) throws Exception {
        Job job = Job.getInstance(this.getConf(), "test26");
        job.setJarByClass(Test01.class);

        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.setInputPaths(job, new Path("C:\\Users\\User\\Desktop\\input\\test26.txt"));

        job.setMapperClass(MapWordCount.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);

//        job.setPartitionerClass(HashPartitioner.class);
//        job.setSortComparatorClass(null);
//        job.setGroupingComparatorClass(null);
//        job.setCombinerClass(null);

//        job.setReducerClass(ReduceWordCount.class);
//        job.setOutputKeyClass(Text.class);
//        job.setOutputValueClass(IntWritable.class);

        job.setOutputFormatClass(TextOutputFormat.class);
        Path path = new Path("C:\\Users\\User\\Desktop\\outTest26");
        FileSystem fs = FileSystem.get(this.getConf());
        if (fs.exists(path)) {
            fs.delete(path, true);
        }
        TextOutputFormat.setOutputPath(job, path);

        //job.setNumReduceTasks(1);
        return job.waitForCompletion(true) ? 0 : -1;
    }

    public static class MapWordCount extends Mapper<LongWritable, Text, Text, NullWritable> {
        Text outputKey = new Text();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String first = value.toString().split(",")[0];
           String second= value.toString().split(",")[1];
           String three=value.toString().split(",")[2];
           String year=value.toString().split(",")[3];
            if (first.length()!=0&&second.length()!=0&&three.length()!=0&&year.length()!=0){
              outputKey.set(first+" "+second+" "+three+" "+year);
             context.write(outputKey, NullWritable.get());
        }
    }
    }

//    public static class ReduceWordCount extends Reducer<Text, IntWritable, Text, IntWritable> {
//        Text outputKey = new Text();
//        IntWritable outputValue = new IntWritable();
//        @Override
//        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
//            int sum=0;
//            for (IntWritable value : values) {
//                sum+=value.get();
//            }
//            outputKey.set(key);
//            outputValue.set(sum);
//            context.write(outputKey,outputValue);
//        }
//    }

}

在这里插入图片描述

需求2实现代码：

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

/**
 * 字段解释: 门店名，营业额，开支额，年份
 *
 * 需求2：按照不同年份将营业数据拆分到不同的文件中
 */
public class Test02 extends Configured implements Tool {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        int status = ToolRunner.run(conf, new Test02(), args);
        System.exit(status);
    }

    public int run(String[] args) throws Exception {
        Job job = Job.getInstance(this.getConf(), "test26");
        job.setJarByClass(Test02.class);

        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.setInputPaths(job, new Path("C:\\Users\\User\\Desktop\\outTest26"));

        job.setMapperClass(MapWordCount.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);

       job.setPartitionerClass(MRPartition.class);
//        job.setSortComparatorClass(null);
//        job.setGroupingComparatorClass(null);
//        job.setCombinerClass(null);

        job.setReducerClass(ReduceWordCount.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);

        job.setOutputFormatClass(TextOutputFormat.class);
        Path path = new Path("C:\\Users\\User\\Desktop\\outTest2602");
        FileSystem fs = FileSystem.get(this.getConf());
        if (fs.exists(path)) {
            fs.delete(path, true);
        }
        TextOutputFormat.setOutputPath(job, path);
        job.setNumReduceTasks(2);
        return job.waitForCompletion(true) ? 0 : -1;
    }

    public static class MapWordCount extends Mapper<LongWritable, Text, Text, NullWritable> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
                context.write(value, NullWritable.get());

        }
    }
    public static class MRPartition extends Partitioner<Text,NullWritable>{
        @Override
        public int getPartition(Text k2, NullWritable v2, int i) {
            String area = k2.toString().split("\\s+")[3];
            if ("2019年".equals(area)){
                return 0;
            }else {
                return 1;
            }
        }
    }
    public static class ReduceWordCount extends Reducer<Text, NullWritable, Text, NullWritable> {
        @Override
        protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            context.write(key,NullWritable.get());
        }
    }

}

在这里插入图片描述

需求3实现代码：

1.创建JavaBean

分析：增加一个字段为净利润

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Comparator;

public class Bean implements WritableComparable<Bean> {
    private String first;
    private String second;
    private String three;
    private String years;
    private String money;

    public Bean() {
    }

    public String getFirst() {
        return first;
    }

    public void setFirst(String first) {
        this.first = first;
    }

    public String getSecond() {
        return second;
    }

    public void setSecond(String second) {
        this.second = second;
    }

    public String getThree() {
        return three;
    }

    public void setThree(String three) {
        this.three = three;
    }

    public String getYears() {
        return years;
    }

    public void setYears(String years) {
        this.years = years;
    }

    public String getMoney() {
        return money;
    }

    public void setMoney(String money) {
        this.money = money;
    }

    @Override
    public String toString() {
        return  first + "\t"  + second +"\t"+ three +"\t"+ years+"\t"+money ;
    }

    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(first);
      dataOutput.writeUTF(second);
      dataOutput.writeUTF(three);
        dataOutput.writeUTF(years);
        dataOutput.writeUTF(money);
    }
    public void setAll(String first,String second,String three,String years,String money){
        this.setFirst(first);
        this.setSecond(second);
        this.setThree(three);
        this.setYears(years);
        this.setMoney(money);
    }

    public void readFields(DataInput dataInput) throws IOException {
        this.first = dataInput.readUTF();
        this.second = dataInput.readUTF();
        this.three = dataInput.readUTF();
        this.years = dataInput.readUTF();
        this.money=dataInput.readUTF();
    }


    @Override
    public int compareTo(Bean o) {
        int comp = this.first.compareTo(o.first);
        if (comp == 0) {
            return  -Integer.valueOf(this.money).compareTo(Integer.valueOf(o.money));
        }
        return comp;
    }
}

2.主代码

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

/**
 * 字段解释: 门店名，营业额，开支额，年份
 * 
 *需求3：对每一年的营业数据按照净盈利排序（营业额-开支额）
 */
public class Test03 extends Configured implements Tool {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        int status = ToolRunner.run(conf, new Test03(), args);
        System.exit(status);
    }

    public int run(String[] args) throws Exception {
        Job job = Job.getInstance(this.getConf(), "test26");
        job.setJarByClass(Test03.class);

        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.setInputPaths(job, new Path("C:\\Users\\User\\Desktop\\outTest26"));

        job.setMapperClass(MapWordCount.class);
        job.setMapOutputKeyClass(Bean.class);
        job.setMapOutputValueClass(NullWritable.class);

//        job.setPartitionerClass(HashPartitioner.class);
//        job.setSortComparatorClass(null);
//        job.setGroupingComparatorClass(null);
//        job.setCombinerClass(null);

        job.setReducerClass(ReduceWordCount.class);
        job.setOutputKeyClass(Bean.class);
        job.setOutputValueClass(NullWritable.class);

        job.setOutputFormatClass(TextOutputFormat.class);
        Path path = new Path("C:\\Users\\User\\Desktop\\outTest2603");
        FileSystem fs = FileSystem.get(this.getConf());
        if (fs.exists(path)) {
            fs.delete(path, true);
        }
        TextOutputFormat.setOutputPath(job, path);

        //job.setNumReduceTasks(1);
        return job.waitForCompletion(true) ? 0 : -1;
    }

    public static class MapWordCount extends Mapper<LongWritable, Text, Bean, NullWritable> {
         Bean bean=new Bean();
           @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
               String first=   value.toString().split("\\s+")[0];
               String second=   value.toString().split("\\s+")[1];
               String three=   value.toString().split("\\s+")[2];
               String year=   value.toString().split("\\s+")[3];
               int money=Integer.parseInt(second)-Integer.parseInt(three);
               String moneyStr=money+"";
               bean.setAll(first,second,three,year,moneyStr);
               context.write(bean,NullWritable.get());
        }
    }


    public static class ReduceWordCount extends Reducer<Bean, NullWritable, Bean, NullWritable> {
        @Override
        protected void reduce(Bean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            context.write(key,NullWritable.get());
        }
    }

}

在这里插入图片描述

需求4实现代码：

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

/**
 * 字段解释: 门店名，营业额，开支额，年份
 *需求4：要求最后输出到文件的数据字段之间以‘\t’分割，后边加两个描述字段：净盈利额、盈利或者亏损标记
 *如：
 *王府井店       1900       600   2020年   1300     盈利
 * 劲松店       800        950   2020年   -150     亏损
 *
 */
public class Test04 extends Configured implements Tool {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        int status = ToolRunner.run(conf, new Test04(), args);
        System.exit(status);
    }

    public int run(String[] args) throws Exception {
        Job job = Job.getInstance(this.getConf(), "test26");
        job.setJarByClass(Test04.class);

        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.setInputPaths(job, new Path("C:\\Users\\User\\Desktop\\outTest26"));

        job.setMapperClass(MapWordCount.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);

//        job.setPartitionerClass(HashPartitioner.class);
//        job.setSortComparatorClass(null);
//        job.setGroupingComparatorClass(null);
//        job.setCombinerClass(null);

        job.setReducerClass(ReduceWordCount.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);

        job.setOutputFormatClass(TextOutputFormat.class);
        Path path = new Path("C:\\Users\\User\\Desktop\\outTest2604");
        FileSystem fs = FileSystem.get(this.getConf());
        if (fs.exists(path)) {
            fs.delete(path, true);
        }
        TextOutputFormat.setOutputPath(job, path);

        //job.setNumReduceTasks(1);
        return job.waitForCompletion(true) ? 0 : -1;
    }

    public static class MapWordCount extends Mapper<LongWritable, Text, Text, NullWritable> {

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            context.write(value,NullWritable.get());
        }
    }


    public static class ReduceWordCount extends Reducer<Text, NullWritable, Text, NullWritable> {
        Text outputKey = new Text();
        @Override
        protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            String second=key.toString().split("\\s+")[1];
            String three=key.toString().split("\\s+")[2];
            int money= Integer.parseInt(second)-Integer.parseInt(three);
            String YN=null;
            if (money<0){
                YN="亏损";
            }else {
                YN="盈利";
            }
            outputKey.set(key+"\t"+money+"\t"+YN);

            context.write(outputKey,NullWritable.get());
        }
    }

}

在这里插入图片描述

人间清醒vv子

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
MapReduce基础测试(一)

需求1：去除源文件中字段缺失的数据需求2：按照不同年份将营业数据拆分到不同的文件中需求3：对每一年的营业数据按照净盈利排序（营业额-开支额）需求4：要求最后输出到文件的数据字段之间以‘\t’分割，后边加两个描述字段：净盈利额、盈利或者亏损标记
复制链接

扫一扫