<hadoop> mapreduce程序分块

最新推荐文章于 2021-01-12 06:26:32 发布

嬲哥有个小明妃

最新推荐文章于 2021-01-12 06:26:32 发布

阅读量395

点赞数

分类专栏：新手hadoop学习文章标签： mapreduce hadoop partitiona

本文链接：https://blog.csdn.net/huahai_nb/article/details/55668644

版权

新手hadoop学习专栏收录该内容

11 篇文章 0 订阅

订阅专栏

有时候需要将处理完的数据分类储存，则需要自定义partitional方法

mapreduce运行过程：
1.hadoop读取文件传参数给map方法；
2.map方法产生KV对发送给partitioner方法；
3.partitioner方法分类给不同的Reduce 进程；
4.Reduce方法将产生的KV对进行处理；
5.Hadoop将不同进程的Reduce方法产生的数据保存在不同文件；

main函数

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * main函数的args要求两个参数：1.源文件位置；2.输出文件数据
 * Created by hadoop on 17-2-18.
 */
public class JobSubmitter {
    public static void main(String[] args) throws Exception {

        if(args.length<2)
        {
            System.out.println("输入参数不正确");
        }
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(JobSubmitter.class);

        job.setMapperClass(ProvinceFlowCountMapper.class);
        job.setReducerClass(ProvinceFlowCountReducer.class);

        //当map的输出类型和reduce或者叫最终输出类型相同是下面两行可以不用写
//        job.setMapOutputKeyClass(Text.class);
//        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        //设置自定义的partitioner,替换系统默认的Hashpartitioner
        job.setPartitionerClass(ProvincePartitioner.class);

        //设置reduce的数量，要和partitional 中设置的数量相匹配,ProvincePartitioner输出了0或者1
        //如果ReduceTasks数量多于Partitioner中设置的数量，则多出来的ReduceTask产生的文件为空
        //如果ReduceTasks数量少于Partitioner中设置的数量，则会报错
        //但是如果ReduceTasks数量为1时，不会报错，但是会收集所有数据，即不存在分区
        job.setNumReduceTasks(2);

        //使用命令行参数更加灵活
        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        job.waitForCompletion(true);
    }
}

map类

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * 统计手机流量
 * 数据：ID（int）    手机号（varchar）    流量（int）
 * Created by hadoop on 17-2-18.
 */
public class ProvinceFlowCountMapper extends Mapper<LongWritable,Text,Text,IntWritable> {
    //创建私有成员减少垃圾回收
    private Text phone = null;
    private IntWritable flow = null;
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String[] fields = line.split("\t");
        this.phone = new Text(fields[1]);
        this.flow = new IntWritable(Integer.parseInt(fields[2]));
        //这样写效率较低
//        IntWritable flow = new IntWritable(Integer.parseInt((fields[2]));
        context.write(phone,flow);
    }
}

Reduce类

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * Created by hadoop on 17-2-18.
 */
public class ProvinceFlowCountReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int flowSum = 0;
        for(IntWritable value:values)
        {
            flowSum += value.get();
        }
        context.write(key,new IntWritable(flowSum));
    }
}

Partitioner类

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

/**
 * 给map函数出来的KV对进行分类，每一个对都会调用一次
 * Created by hadoop on 17-2-18.
 */
public class ProvincePartitioner extends Partitioner<Text,IntWritable> {
    @Override
    public int getPartition(Text text, IntWritable intWritable, int i) {
        String phone = text.toString();
        phone = phone.substring(phone.length()-1,phone.length());
        int charge = Integer.parseInt(phone)%2;
        return charge;
    }
}