Hadoop的MapReduce实例--手机上下行流量

最新推荐文章于 2023-02-06 21:54:19 发布

尘世壹俗人

最新推荐文章于 2023-02-06 21:54:19 发布

阅读量332

点赞数

分类专栏：大数据Hadoop技术文章标签： hadoop

本文链接：https://blog.csdn.net/dudadudadd/article/details/111867084

版权

大数据Hadoop技术专栏收录该内容

26 篇文章 0 订阅

订阅专栏

pom依赖如下

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

	<!-- 这一块用你自己的 -->
    <groupId>com.wy</groupId>
    <artifactId>FOBJ</artifactId>
    <packaging>pom</packaging>
    <version>1.0-SNAPSHOT</version>
    
    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>RELEASE</version>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-core</artifactId>
            <version>2.8.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.7.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.7.2</version>
        </dependency>
        
		<!--这个依赖可以不要，要的话把下面的路径变成自己的JDK安装路径，这是我之前电脑的JDK出问题了，才用的这个-->
        <dependency>
            <groupId>jdk.tools</groupId>
            <artifactId>jdk.tools</artifactId>
            <version>1.8</version>
            <scope>system</scope>
            <systemPath>C:/Program Files/Java/jdk1.8.0_211/lib/tools.jar</systemPath>
        </dependency>
    </dependencies>
</project>

下面个大家演示一个简单的计算手机号码上下行流量的MapReduce

首先准备一个实体类的Bean

package com.wy;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * 实现WritableComparable接口泛型写当前类
 * 这里注意Hadoop还提供了一个Writable接口，我们不要实现这个
 * 因为MR的运行需要排序Writable接口没有相应方法
 * 
 */
public class TeleptoneBean implements WritableComparable<TeleptoneBean> {
    private  String telephone ;  //手机号码
    private  long    upFlow ;    //上行流量
    private  long    downFlow ;  // 下行流量
    private  long    sumFlow ;   //总流量

    //实现空的一个构造函数
    public TeleptoneBean() {
    }
    
    //一个有参构造
    public  void setTeleptoneBean(String telephone, long upFlow, long downFlow) {
        this.telephone = telephone;
        this.upFlow = upFlow;
        this.downFlow = downFlow;
        this.sumFlow = this.upFlow + this.downFlow ;
    }

    // 重写MR写入方法
    @Override
    public void write(DataOutput out ) throws IOException {
        out.writeUTF( this.telephone);
        out.writeLong( this.upFlow);
        out.writeLong(this.downFlow);
        out.writeLong(this.sumFlow);

    }

    /**
     * 这里特别注意写入方法和读取方法内部的属性最好一致
     * 
     */
    
    // 重写MR读取方法
    @Override
    public void readFields(DataInput in ) throws IOException {
        this.telephone = in.readUTF() ;
        this.upFlow = in.readLong() ;
        this.downFlow = in.readLong() ;
        this.sumFlow = in.readLong() ;

    }

    // 实现tostring方法，MR输出结果用
    @Override
    public String toString() {
        return telephone + '\t' +
                + upFlow
                + '\t' + downFlow
                + '\t' + sumFlow ;
    }


    //所有的get set 方法
    public String getTelephone() {
        return telephone;
    }

    public void setTelephone(String telephone) {
        this.telephone = telephone;
    }

    public long getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(long upFlow) {
        this.upFlow = upFlow;
    }

    public long getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(long downFlow) {
        this.downFlow = downFlow;
    }

    public long getSumFlow() {
        return sumFlow;
    }

    public void setSumFlow(long sumFlow) {
        this.sumFlow = sumFlow;
    }

	/**
	*这个是重写的MR排序方法
	*/
    @Override
    public int compareTo(TeleptoneBean o) {
        int compare = Long.compare(o.getSumFlow(), this.sumFlow);
        return  compare;
    }


}

最后Driver类如下

package com.wy;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class TeleptoneDrvier {

    /**
     * map 采用内部类去实现，大家也可以提出去，不过工作中大家都是用内部类
     *
     * 数据的样式如下：
     * id   tel         ip             url             up       down    status
     * 1	13736230513	192.196.100.1	www.atguigu.com	2481	24681	200
     */
    public static class TeleptoneMapper extends Mapper<LongWritable, Text, TeleptoneBean, NullWritable> {
        // 实例化一个Bean对象，最好不要在map方法里面直接new不然会占用相当大的资源
        TeleptoneBean teleptoneBean = new TeleptoneBean();

        /**
         * map中的setup方法大家可以做一些自己的数据预处理
         * 比如获取数据来源的名字等
         */
        @Override
        protected void setup(Context context) throws IOException, InterruptedException {

        }

        /**
         * map方法我们做的事情就是分离出我们想要的数据
         * 最后用我们自己的Bean进行洗牌
         */
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            // 1	13736230513	192.196.100.1	www.atguigu.com	2481	24681	200
            String[] split = value.toString().split("\t");
            //数据清洗，上下行流量长度最大要求是7 个，对数据要求是整数
            // 对数据进行清洗按规则
            String foramte = "^[0-9]*[1-9][0-9]*$";
            if (split.length == 7 && split[split.length - 3].matches(foramte) && split[split.length - 2].matches(foramte)) {
                teleptoneBean.setTeleptoneBean(split[1], Long.parseLong(split[split.length - 3]), Long.parseLong(split[split.length - 2]));
                context.write(teleptoneBean, NullWritable.get());
            }
        }

        /**
         * 这个方法和setup意图差不多，只是他是用来处理map之后的自定义操作
         */
        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            super.cleanup(context);
        }
    }


    /**
     * reduce端我们计算一下和就可以了了
     * 因为这个时候数据经历的洗牌每一次reduce处理的数据
     * 其实就是一组数据，而这一组数据是MR排序之后的结果
     */
    public static class TeleptoneReduce extends Reducer<TeleptoneBean, NullWritable, TeleptoneBean, NullWritable> {

        TeleptoneBean teleptoneBean = new TeleptoneBean();

        /**
         * 作用同Map阶段一样只是处理的是map之后的数据
         */
        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            super.setup(context);
        }

        @Override
        protected void reduce(TeleptoneBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            //计数
            long sumUp = 0;
            long sumDown = 0;
            /**
             * 这个for是个重点
             * 大家要知道分组之后虽然我们value用的是空
             * 但是他和key还是一对对出现的它的移动
             * 会促使key跟着移动
             */
            for (NullWritable c :
                    values) {
                sumUp += key.getUpFlow();
                sumDown += key.getDownFlow();
            }
            teleptoneBean.setTeleptoneBean(key.getTelephone(), sumUp, sumDown);
            context.write(teleptoneBean, NullWritable.get());
        }

        /**
         * 作用同上
         */
        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            super.cleanup(context);
        }
    }

    //job 工作完成流程
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        //创建一个提交作业需要的配置对象，我们现在
        //不更改它，使用它里面默认的配置
        Configuration cfg = new Configuration();
        //生成任务对象
        Job job = Job.getInstance(cfg);
        //修改任务所在的Driver类
        job.setJarByClass(TeleptoneDrvier.class);
        //对输出参数设置
        job.setOutputKeyClass(TeleptoneBean.class);
        job.setOutputValueClass(NullWritable.class);
        //对输出参数配置
        job.setMapOutputKeyClass(TeleptoneBean.class);
        job.setMapOutputValueClass(NullWritable.class);
        //设置map reduce类
        job.setMapperClass(TeleptoneMapper.class);
        job.setReducerClass(TeleptoneReduce.class);


        //设置输入输出路径
        FileInputFormat.setInputPaths(job, new Path("D:\\a\\inputphone"));
        FileOutputFormat.setOutputPath(job, new Path("D:\\a\\outputphone"));

        //退出，单mapreduce这样写如果是多个MapReduce则有些不同了，我会在补充，这里大家先知道单MapReduce怎么写的
        boolean b = job.waitForCompletion(true);
        System.exit(b == true ? 0 : -1);


    }
}

最后我们直接运行就可以了，大家也可以打jar包，在服务器上用hadoop jar命令提交任务，不过记得将Driver类的参数修改为main的参数哦

尘世壹俗人

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
Hadoop的MapReduce实例--手机上下行流量

pom依赖如下<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org
复制链接

扫一扫