MapReduce统计流量案例(自定义序列化类)基本实现

输入数据(以\t间隔)

   id    手机号          IP地址      访问域名(有的有有的无)  上行流量 下行流量 状态码

1	11111111111	120.196.100.99	100	900	200
2	11111111112	120.196.100.92	www.baidu.com	200	200	200
3	11111111113	120.196.100.93	800	200	200
4	11111111114	120.196.100.95	30	970	200
5	11111111116	120.196.100.95	www.baidu.com	105	895	200
6	11111111115	120.196.100.99	100	900	200
7	11111111112	120.196.100.59	www.baidu.com	300	300	200
8	11111111118	120.196.100.96	150	850	200

Maven必须配置

注意:Windos本地运行需要确定本地有Hadoop依赖并确保和Pom配置文件中版本一致,WordCountDriver中第6点输入输出需要自行修改

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.test2</groupId>
    <artifactId>mapredceDemo1</artifactId>
    <version>1.0-SNAPSHOT</version>

    <dependencies>
        <!-- Hadoop 本地客户端 -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>3.0.1</version>
        </dependency>
        <!-- 测试相关组件 -->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>
        <!-- 日志打印组件 -->
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.30</version>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <!-- maven 打包插件 -->
            <plugin>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.6.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
            <!-- maven 打包插件-附带依赖打包 -->
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

</project>

resources目录下log4j.properties 配置

log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n

自定义Writable类实现(FlowBean)

package com.test.mapreduce.writable;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * 1. 必须实现Writable接口
 * 2. 创建空参构造方法和各个参数的get和set
 * 3. 重写序列化方法
 * 4. 重写反序列化方法
 * 5. 注意序列化和反序列化顺序完全一致
 * 6. 要想把结果显示在文件中,需要重写toString()
 */
public class FlowBean implements Writable {

    private Long upFlow;   // 上行流量
    private Long downFlow; // 下行流量
    private Long sumFlow;  // 总流量

    public Long getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(Long upFlow) {
        this.upFlow = upFlow;
    }

    public Long getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(Long downFlow) {
        this.downFlow = downFlow;
    }

    public Long getSumFlow() {
        return sumFlow;
    }

    public void setSumFlow() {
        this.sumFlow = this.upFlow + this.downFlow;
    }

    /**
     * 创建空参构造方法
     */
    public FlowBean() {
    }

    /**
     * 重写序列化方法
     */
    @Override
    public void write(DataOutput dataOutput) throws IOException {
        // 序列化Value数据
        dataOutput.writeLong(upFlow);
        dataOutput.writeLong(downFlow);
        dataOutput.writeLong(sumFlow);

    }

    /**
     * 重写反序列化方法
     */
    @Override
    public void readFields(DataInput dataInput) throws IOException {
        // 反序列化数据
        this.upFlow = dataInput.readLong();
        this.downFlow = dataInput.readLong();
        this.sumFlow = dataInput.readLong();
    }

    /**
     * 重写toString方法
     */
    @Override
    public String toString() {
        return upFlow + "\t" + downFlow + "\t" + sumFlow;
    }
}

自定义Mapper类实现(FlowMapper)

package com.test.mapreduce.writable;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class FlowMapper extends Mapper<LongWritable, Text, Text, FlowBean> {

    // 定义Text对象,用于封装数据
    private Text k = new Text();
    // 定义FlowBean对象,用于封装数据
    private FlowBean v = new FlowBean();

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        // 1. 读取每一行
        String line = value.toString();

        // 2. 字符串切割
        String[] dataList = line.split("\t");

        // 3. 获取需要封装的数据
        String phone = dataList[1];                   // 手机号
        String up    = dataList[dataList.length - 3]; // 上行流量
        String down  = dataList[dataList.length - 2]; // 下行流量

        // 4. 封装K,V
        k.set(phone);
        v.setUpFlow(Long.parseLong(up));
        v.setDownFlow(Long.parseLong(down));
        v.setSumFlow();

        // 5. 输出K,V
        context.write(k, v);
    }
}

自定义Reducer类实现(FlowReducer)

package com.test.mapreduce.writable;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FlowReducer extends Reducer<Text, FlowBean, Text, FlowBean> {

    // 定义flowBean对象,用于封装Value
    private FlowBean v = new FlowBean();

    @Override
    protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
        // 1. 每次调用reduce初始化 总上行和总下行流量(因为同一个key可能会有多个数据)
        long totalUp   = 0;
        long totalDown = 0;

        // 2. 计算总下行和总上行
        for (FlowBean flowBean : values) {
            totalUp   += flowBean.getUpFlow();
            totalDown += flowBean.getDownFlow();
        }

        // 3. 封装K,V
        v.setUpFlow(totalUp);
        v.setDownFlow(totalDown);
        v.setSumFlow();

        // 4. 输出K,V
        context.write(key, v);
    }
}

自定义Driver类实现(FlowDriver)

package com.test.mapreduce.writable;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class FlowDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        // 1.创建配置信息Configuration对象并获取Job单例对象
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        // 2.设置关联本Driver程序的jar
        job.setJarByClass(FlowDriver.class);

        // 3.设置关联Mapper和Reducer的jar
        job.setMapperClass(FlowMapper.class);
        job.setReducerClass(FlowReducer.class);

        // 4.设置Mapper输出的kv类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);

        // 5. 设置最终输出的kv类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);

        // 6.设置输入和输出路径
        FileInputFormat.setInputPaths(job, new Path("D:\\input"));
        FileOutputFormat.setOutputPath(job, new Path("D:\\output"));

        // 7.提交job
        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);

    }
}

输出数据

  手机号   总上行流量  总下行流量  总流量

11111111111	100	900	1000
11111111112	500	500	1000
11111111113	800	200	1000
11111111114	30	970	1000
11111111115	100	900	1000
11111111116	105	895	1000
11111111118	150	850	1000

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值