MapReduce基础测试(一)

MapReduce基础测试(一)

字段解释: 门店名,营业额,开支额,年份
劲松店,600,350,2019年
劲松店,800,250,2020年
王府井店,1900,600,2020年
王府井店,2000,900,2019年
回龙观店,6700,1800,2020年
西单店,3000,1000,2019年
西单店,5000,1000,2020年
,3500,1000,2020年
牡丹园店,3800,1400,2020
牡丹园店,2800,1300,2019年
西直门店,1500,900,2019年
太阳宫店,9000,3600,2019年
三里屯店,,1000,2020年
西直门店,3500,1000,2020年
太阳宫店,6000,4600,2020年
回龙观店,7500,2000,2019年
需求1:去除源文件中字段缺失的数据
需求2:按照不同年份将营业数据拆分到不同的文件中
需求3:对每一年的营业数据按照净盈利排序(营业额-开支额)
需求4:要求最后输出到文件的数据字段之间以‘\t’分割,后边加两个描述字段:净盈利额、盈利或者亏损标记

如:

王府井店 1900 600 2020年 1300 盈利
劲松店 800 950 2020年 -150 亏损

准备

创建test26.txt文件,存入数据

需求1实现代码:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

/**
 * 字段解释: 门店名,营业额,开支额,年份
 * 需求1:去除源文件中字段缺失的数据
 *
 */
public class Test01 extends Configured implements Tool {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        int status = ToolRunner.run(conf, new Test01(), args);
        System.exit(status);
    }

    public int run(String[] args) throws Exception {
        Job job = Job.getInstance(this.getConf(), "test26");
        job.setJarByClass(Test01.class);

        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.setInputPaths(job, new Path("C:\\Users\\User\\Desktop\\input\\test26.txt"));

        job.setMapperClass(MapWordCount.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);

//        job.setPartitionerClass(HashPartitioner.class);
//        job.setSortComparatorClass(null);
//        job.setGroupingComparatorClass(null);
//        job.setCombinerClass(null);

//        job.setReducerClass(ReduceWordCount.class);
//        job.setOutputKeyClass(Text.class);
//        job.setOutputValueClass(IntWritable.class);

        job.setOutputFormatClass(TextOutputFormat.class);
        Path path = new Path("C:\\Users\\User\\Desktop\\outTest26");
        FileSystem fs = FileSystem.get(this.getConf());
        if (fs.exists(path)) {
            fs.delete(path, true);
        }
        TextOutputFormat.setOutputPath(job, path);

        //job.setNumReduceTasks(1);
        return job.waitForCompletion(true) ? 0 : -1;
    }

    public static class MapWordCount extends Mapper<LongWritable, Text, Text, NullWritable> {
        Text outputKey = new Text();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String first = value.toString().split(",")[0];
           String second= value.toString().split(",")[1];
           String three=value.toString().split(",")[2];
           String year=value.toString().split(",")[3];
            if (first.length()!=0&&second.length()!=0&&three.length()!=0&&year.length()!=0){
              outputKey.set(first+" "+second+" "+three+" "+year);
             context.write(outputKey, NullWritable.get());
        }
    }
    }

//    public static class ReduceWordCount extends Reducer<Text, IntWritable, Text, IntWritable> {
//        Text outputKey = new Text();
//        IntWritable outputValue = new IntWritable();
//        @Override
//        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
//            int sum=0;
//            for (IntWritable value : values) {
//                sum+=value.get();
//            }
//            outputKey.set(key);
//            outputValue.set(sum);
//            context.write(outputKey,outputValue);
//        }
//    }

}

在这里插入图片描述

需求2实现代码:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

/**
 * 字段解释: 门店名,营业额,开支额,年份
 *
 * 需求2:按照不同年份将营业数据拆分到不同的文件中
 */
public class Test02 extends Configured implements Tool {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        int status = ToolRunner.run(conf, new Test02(), args);
        System.exit(status);
    }

    public int run(String[] args) throws Exception {
        Job job = Job.getInstance(this.getConf(), "test26");
        job.setJarByClass(Test02.class);

        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.setInputPaths(job, new Path("C:\\Users\\User\\Desktop\\outTest26"));

        job.setMapperClass(MapWordCount.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);

       job.setPartitionerClass(MRPartition.class);
//        job.setSortComparatorClass(null);
//        job.setGroupingComparatorClass(null);
//        job.setCombinerClass(null);

        job.setReducerClass(ReduceWordCount.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);

        job.setOutputFormatClass(TextOutputFormat.class);
        Path path = new Path("C:\\Users\\User\\Desktop\\outTest2602");
        FileSystem fs = FileSystem.get(this.getConf());
        if (fs.exists(path)) {
            fs.delete(path, true);
        }
        TextOutputFormat.setOutputPath(job, path);
        job.setNumReduceTasks(2);
        return job.waitForCompletion(true) ? 0 : -1;
    }

    public static class MapWordCount extends Mapper<LongWritable, Text, Text, NullWritable> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
                context.write(value, NullWritable.get());

        }
    }
    public static class MRPartition extends Partitioner<Text,NullWritable>{
        @Override
        public int getPartition(Text k2, NullWritable v2, int i) {
            String area = k2.toString().split("\\s+")[3];
            if ("2019年".equals(area)){
                return 0;
            }else {
                return 1;
            }
        }
    }
    public static class ReduceWordCount extends Reducer<Text, NullWritable, Text, NullWritable> {
        @Override
        protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            context.write(key,NullWritable.get());
        }
    }

}

在这里插入图片描述
在这里插入图片描述

需求3实现代码:
1.创建JavaBean

分析:增加一个字段 为净利润

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Comparator;

public class Bean implements WritableComparable<Bean> {
    private String first;
    private String second;
    private String three;
    private String years;
    private String money;

    public Bean() {
    }

    public String getFirst() {
        return first;
    }

    public void setFirst(String first) {
        this.first = first;
    }

    public String getSecond() {
        return second;
    }

    public void setSecond(String second) {
        this.second = second;
    }

    public String getThree() {
        return three;
    }

    public void setThree(String three) {
        this.three = three;
    }

    public String getYears() {
        return years;
    }

    public void setYears(String years) {
        this.years = years;
    }

    public String getMoney() {
        return money;
    }

    public void setMoney(String money) {
        this.money = money;
    }

    @Override
    public String toString() {
        return  first + "\t"  + second +"\t"+ three +"\t"+ years+"\t"+money ;
    }

    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(first);
      dataOutput.writeUTF(second);
      dataOutput.writeUTF(three);
        dataOutput.writeUTF(years);
        dataOutput.writeUTF(money);
    }
    public void setAll(String first,String second,String three,String years,String money){
        this.setFirst(first);
        this.setSecond(second);
        this.setThree(three);
        this.setYears(years);
        this.setMoney(money);
    }

    public void readFields(DataInput dataInput) throws IOException {
        this.first = dataInput.readUTF();
        this.second = dataInput.readUTF();
        this.three = dataInput.readUTF();
        this.years = dataInput.readUTF();
        this.money=dataInput.readUTF();
    }


    @Override
    public int compareTo(Bean o) {
        int comp = this.first.compareTo(o.first);
        if (comp == 0) {
            return  -Integer.valueOf(this.money).compareTo(Integer.valueOf(o.money));
        }
        return comp;
    }
}

2.主代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

/**
 * 字段解释: 门店名,营业额,开支额,年份
 * 
 *需求3:对每一年的营业数据按照净盈利排序(营业额-开支额)
 */
public class Test03 extends Configured implements Tool {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        int status = ToolRunner.run(conf, new Test03(), args);
        System.exit(status);
    }

    public int run(String[] args) throws Exception {
        Job job = Job.getInstance(this.getConf(), "test26");
        job.setJarByClass(Test03.class);

        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.setInputPaths(job, new Path("C:\\Users\\User\\Desktop\\outTest26"));

        job.setMapperClass(MapWordCount.class);
        job.setMapOutputKeyClass(Bean.class);
        job.setMapOutputValueClass(NullWritable.class);

//        job.setPartitionerClass(HashPartitioner.class);
//        job.setSortComparatorClass(null);
//        job.setGroupingComparatorClass(null);
//        job.setCombinerClass(null);

        job.setReducerClass(ReduceWordCount.class);
        job.setOutputKeyClass(Bean.class);
        job.setOutputValueClass(NullWritable.class);

        job.setOutputFormatClass(TextOutputFormat.class);
        Path path = new Path("C:\\Users\\User\\Desktop\\outTest2603");
        FileSystem fs = FileSystem.get(this.getConf());
        if (fs.exists(path)) {
            fs.delete(path, true);
        }
        TextOutputFormat.setOutputPath(job, path);

        //job.setNumReduceTasks(1);
        return job.waitForCompletion(true) ? 0 : -1;
    }

    public static class MapWordCount extends Mapper<LongWritable, Text, Bean, NullWritable> {
         Bean bean=new Bean();
           @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
               String first=   value.toString().split("\\s+")[0];
               String second=   value.toString().split("\\s+")[1];
               String three=   value.toString().split("\\s+")[2];
               String year=   value.toString().split("\\s+")[3];
               int money=Integer.parseInt(second)-Integer.parseInt(three);
               String moneyStr=money+"";
               bean.setAll(first,second,three,year,moneyStr);
               context.write(bean,NullWritable.get());
        }
    }


    public static class ReduceWordCount extends Reducer<Bean, NullWritable, Bean, NullWritable> {
        @Override
        protected void reduce(Bean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            context.write(key,NullWritable.get());
        }
    }

}

在这里插入图片描述

需求4实现代码:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

/**
 * 字段解释: 门店名,营业额,开支额,年份
 *需求4:要求最后输出到文件的数据字段之间以‘\t’分割,后边加两个描述字段:净盈利额、盈利或者亏损标记
 *如:
 *王府井店       1900       600   2020年   1300     盈利
 * 劲松店       800        950   2020年   -150     亏损
 *
 */
public class Test04 extends Configured implements Tool {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        int status = ToolRunner.run(conf, new Test04(), args);
        System.exit(status);
    }

    public int run(String[] args) throws Exception {
        Job job = Job.getInstance(this.getConf(), "test26");
        job.setJarByClass(Test04.class);

        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.setInputPaths(job, new Path("C:\\Users\\User\\Desktop\\outTest26"));

        job.setMapperClass(MapWordCount.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);

//        job.setPartitionerClass(HashPartitioner.class);
//        job.setSortComparatorClass(null);
//        job.setGroupingComparatorClass(null);
//        job.setCombinerClass(null);

        job.setReducerClass(ReduceWordCount.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);

        job.setOutputFormatClass(TextOutputFormat.class);
        Path path = new Path("C:\\Users\\User\\Desktop\\outTest2604");
        FileSystem fs = FileSystem.get(this.getConf());
        if (fs.exists(path)) {
            fs.delete(path, true);
        }
        TextOutputFormat.setOutputPath(job, path);

        //job.setNumReduceTasks(1);
        return job.waitForCompletion(true) ? 0 : -1;
    }

    public static class MapWordCount extends Mapper<LongWritable, Text, Text, NullWritable> {

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            context.write(value,NullWritable.get());
        }
    }


    public static class ReduceWordCount extends Reducer<Text, NullWritable, Text, NullWritable> {
        Text outputKey = new Text();
        @Override
        protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            String second=key.toString().split("\\s+")[1];
            String three=key.toString().split("\\s+")[2];
            int money= Integer.parseInt(second)-Integer.parseInt(three);
            String YN=null;
            if (money<0){
                YN="亏损";
            }else {
                YN="盈利";
            }
            outputKey.set(key+"\t"+money+"\t"+YN);

            context.write(outputKey,NullWritable.get());
        }
    }

}

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

人间清醒vv子

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值