MapReduce案例


边学边写


WordCount

1. 需求

  在给定的文本文件中统计输出每一个单词出现的总次数
在这里插入图片描述

根据要求,分别编写 Mapper、Ruducer、Driver

2. 环境准备

IDEA+jdk17+自带的maven

  1. 创建工程
    打开IDEA,新建工程,选Maven
    在这里插入图片描述
    在这里插入图片描述
    在这里插入图片描述
    在这里插入图片描述
    这里系统会自动下载Maven 的相关文件,在右下角,等他下载完

  2. 导入依赖
    在iml文件中贴入以下代码
    在这里插入图片描述

 <dependencies>
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>RELEASE</version>
    </dependency>
    <dependency>
        <groupId>org.apache.logging.log4j</groupId>
        <artifactId>log4j-core</artifactId>
        <version>2.8.2</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>2.7.2</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>2.7.2</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-hdfs</artifactId>
        <version>2.7.2</version>
    </dependency>
 </dependencies>

在pom.xml 中相应位置贴入以下代码

<plugin>
        <artifactId>maven-compiler-plugin</artifactId>
        <version>3.1</version>
        <configuration>
          <source>1.8</source>
          <target>1.8</target>
        </configuration>
      </plugin>
      <plugin>
        <artifactId>maven-assembly-plugin</artifactId>
        <configuration>
          <descriptorRefs>
            <descriptorRef>
              jar-with-dependencies
            </descriptorRef>
          </descriptorRefs>
        </configuration>
        <executions>
          <execution>
            <id>make-assembly</id>
            <phase>package</phase>
            <goals>
              <goal>single</goal>
            </goals>
          </execution>
        </executions>
      </plugin>
  1. 配置log4
    在main 目录下,创建 resources
    在这里插入图片描述

    在rescources 目录下,创建 file,名字是 log4j.properties
    贴入以下代码

log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d	%p	[%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d       %p      [%c] - %m%n
  1. 在main/java 路径下,创建包 com.mapreduce.wordcount
    在这里插入图片描述

3. Mapper

作用:切分,生成键值对
创建java程序,wordcountMapper

继承 Mapper 父类,去看Mapper的详细代码
一定要自己写一遍

package com.mapreduce.wordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * @author Tancy
 * @date 2021/10/21
 * @time 11:02
 **/
public class WordCount extends Mapper<LongWritable,Text,Text, IntWritable> {
    Text k = new Text();
    IntWritable v = new IntWritable(1);
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        // 1 将Text转化为String
        String line = value.toString();
        // 2 根据空格进行切分
        String[] words = line.split(" ");
        // 3 遍历输出
        for (String word : words){
            k.set(word);
            context.write(k,v);
        }
    }
}

4. Reducer

作用:汇总,统计
创建java,wordcountReducer
继承父类 Reducer

package com.mapreduce.wordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * @author Tancy
 * @date 2021/10/21
 * @time 13:09
 **/
public class wordcountReduce extends Reducer<Text, IntWritable,Text,IntWritable> {
    IntWritable v =new IntWritable();
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        //
        // 1.累加和
        int sum = 0;
        for(IntWritable value:values){
            sum += value.get();
        }
        //2. 写出
        v.set(sum);
        context.write(key,v);
    }
}

5. Driver

八大步
八股文,每个都一样
在这里插入图片描述

package com.mapreduce.wordcount;

import com.sun.xml.bind.v2.runtime.output.StAXExStreamWriterOutput;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * @author Tancy
 * @date 2021/10/21
 * @time 14:12
 **/
public class WordCount {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        // 1. 配置job
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        //2. set jar path
        job.setJarByClass(WordCount.class);//根据编译生成的class找到其他的文件

        //3. connect Mapper and Reducer
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(wordcountReduce.class);

        //4. set type of Mapper outout
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        //5.set type of final output
        job.setOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        //6. set input path
        FileInputFormat.setInputPaths(job,new Path(args[0]));

        //7. set output path
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        //8. submit
        boolean res = job.waitForCompletion(true);
        System.exit(res?0:1);
    }
}

在这里插入图片描述

打包,即可得到一个wordcouont包,传到虚拟机中即可运行

序列化

1. 需求

在这里插入图片描述

2. FlowBean

package com.mapreduce;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class FlowBean implements Writable {
    private long upFlow; // 上行流量
    private long downFlow; // 下行流量
    private long sumFlow; // 总流量

    // 空参构造
    public FlowBean() {
        super();
    }

    public FlowBean(long upFlow, long downFlow) {
        super();
        this.upFlow = upFlow;
        this.downFlow = downFlow;
        this.sumFlow = upFlow + downFlow;
    }

    @Override
    public String toString() {
        return this.upFlow + "\t" + this.downFlow + "\t" + this.sumFlow;
    }

    // 序列化方法
    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeLong(this.upFlow);
        dataOutput.writeLong(this.downFlow);
        dataOutput.writeLong(this.sumFlow);
    }

    // 反序列化方法
    @Override
    public void readFields(DataInput dataInput) throws IOException {
        // 顺序必须与序列化一致
        this.upFlow = dataInput.readLong();
        this.downFlow = dataInput.readLong();
        this.sumFlow = dataInput.readLong();
    }

    public long getUpFlow() {
        return upFlow;
    }

    public long getDownFlow() {
        return downFlow;
    }

    public long getSumFlow() {
        return sumFlow;
    }

    public void setUpFlow(long upFlow) {
        this.upFlow = upFlow;
    }

    public void setDownFlow(long downFlow) {
        this.downFlow = downFlow;
    }

    public void setSumFlow(long sumFlow) {
        this.sumFlow = sumFlow;
    }

    public void set(long u, long d) {
        this.upFlow = u;
        this.downFlow = d;
        this.sumFlow = u + d;
    }
}

3. Mapper

package com.mapreduce;

import com.mapreduce.FlowBean;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.awt.*;
import java.io.IOException;

/**
 * @author Tancy
 * @date 2021/10/26
 * @time 18:32
 **/
public class FlowMapper extends Mapper<LongWritable, Text,Text, FlowBean> {
    FlowBean v = new FlowBean();
    Text k = new Text();

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String[] flieds = line.split(" ");
        k.set(flieds[1]);
        v.set(Long.parseLong(flieds[flieds.length - 3]),Long.parseLong(flieds[flieds.length - 2]));
        context.write(k,v);
    }
}

4. FlowReducer

package com.mapreduce;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * @author Tancy
 * @date 2021/10/26
 * @time 18:44
 **/

public class FlowReducer extends Reducer<Text, FlowBean, Text, FlowBean> {

    FlowBean v = new FlowBean();
    @Override
    protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
//        super.reduce(key, values, context);
        long total_upFlow = 0;
        long total_downFlow = 0;
        for (FlowBean flowBean: values){
            total_upFlow += flowBean.getUpFlow();
            total_downFlow += flowBean.getDownFlow();
        }
        v.set(total_upFlow,total_downFlow);
        context.write(key,v);
    }
}

5. FlowDiver

package com.mapreduce;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * @author Tancy
 * @date 2021/10/21
 * @time 14:12
 **/
public class FlowDiver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        // 1. 配置job
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        //2. set jar path
        job.setJarByClass(FlowDiver.class);//根据编译生成的class找到其他的文件

        //3. connect Mapper and Reducer
        job.setMapperClass(FlowMapper.class);
        job.setReducerClass(FlowReducer.class);

        //4. set type of Mapper output
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);

        //5.set type of final output
        job.setOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);

        //6. set input path
        FileInputFormat.setInputPaths(job,new Path(args[0]));

        //7. set output path
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        //8. submit
        boolean res = job.waitForCompletion(true);
        System.exit(res?0:1);
    }
}

0. 输入输出

输入

 1 13736230513 192.196.100.1 nefu.edu.cn 2481 24681 200
2 13846544121 192.196.100.2 264 0 200
3 13956435636 192.196.100.3 132 1512 200
4 13966251146 192.168.100.1 240 0 404
5 18271575951 192.168.100.2 nefu.edu.cn 1527 2106 200
6 84188413 192.168.100.3 nefu.edu.cn 4116 1432 200
7 13590439668 192.168.100.4 1116 954 200
8 15910133277 192.168.100.5 www.hao123.com 3156 2936 200
9 13729199489 192.168.100.6 240 0 200
10 13630577991 192.168.100.7 www.shouhu.com 3156 2936 200
11 15043685818 192.168.100.8 www.baidu.com 3659 3538 200
12 15959002129 192.168.100.9 nefu.edu.cn 1938 180 500
13 13560439638 192.168.100.10 918 4938 200
14 13470253144 192.168.100.11 180 180 200
15 13682846555 192.168.100.12 www.qq.com 1938 2910 200
16 13992314666 192.168.100.13 www.163.com 3008 3720 200
17 13509468723 192.168.100.14 www.taobao.com 7335 110349 404
18 18390173782 192.168.100.15 www.sogou.com 9531 2412 200
19 13975057813 192.168.100.16 www.baidu.com 11058 48243 200
20 13768778790 192.168.100.17 120 120 200
21 13568436656 192.168.100.18 www.alibaba.com 2481 24681 200
22 13568436656 192.168.100.19 1116 954 200

输出
在这里插入图片描述

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Tancy.

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值