<hadoop> mapreduce程序分块

有时候需要将处理完的数据分类储存,则需要自定义partitional方法

mapreduce运行过程:
1.hadoop读取文件传参数给map方法;
2.map方法产生KV对发送给partitioner方法;
3.partitioner方法分类给不同的Reduce 进程;
4.Reduce方法将产生的KV对进行处理;
5.Hadoop将不同进程的Reduce方法产生的数据保存在不同文件;

main函数

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * main函数的args要求两个参数:1.源文件位置;2.输出文件数据
 * Created by hadoop on 17-2-18.
 */
public class JobSubmitter {
    public static void main(String[] args) throws Exception {

        if(args.length<2)
        {
            System.out.println("输入参数不正确");
        }
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(JobSubmitter.class);

        job.setMapperClass(ProvinceFlowCountMapper.class);
        job.setReducerClass(ProvinceFlowCountReducer.class);

        //当map的输出类型和reduce或者叫最终输出类型相同是下面两行可以不用写
//        job.setMapOutputKeyClass(Text.class);
//        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        //设置自定义的partitioner,替换系统默认的Hashpartitioner
        job.setPartitionerClass(ProvincePartitioner.class);

        //设置reduce的数量,要和partitional 中设置的数量相匹配,ProvincePartitioner输出了0或者1
        //如果ReduceTasks数量多于Partitioner中设置的数量,则多出来的ReduceTask产生的文件为空
        //如果ReduceTasks数量少于Partitioner中设置的数量,则会报错
        //但是如果ReduceTasks数量为1时,不会报错,但是会收集所有数据,即不存在分区
        job.setNumReduceTasks(2);

        //使用命令行参数更加灵活
        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        job.waitForCompletion(true);
    }
}

map类

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * 统计手机流量
 * 数据:ID(int)    手机号(varchar)    流量(int)
 * Created by hadoop on 17-2-18.
 */
public class ProvinceFlowCountMapper extends Mapper<LongWritable,Text,Text,IntWritable> {
    //创建私有成员减少垃圾回收
    private Text phone = null;
    private IntWritable flow = null;
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String[] fields = line.split("\t");
        this.phone = new Text(fields[1]);
        this.flow = new IntWritable(Integer.parseInt(fields[2]));
        //这样写效率较低
//        IntWritable flow = new IntWritable(Integer.parseInt((fields[2]));
        context.write(phone,flow);
    }
}

Reduce类

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * Created by hadoop on 17-2-18.
 */
public class ProvinceFlowCountReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int flowSum = 0;
        for(IntWritable value:values)
        {
            flowSum += value.get();
        }
        context.write(key,new IntWritable(flowSum));
    }
}

Partitioner类

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

/**
 * 给map函数出来的KV对进行分类,每一个对都会调用一次
 * Created by hadoop on 17-2-18.
 */
public class ProvincePartitioner extends Partitioner<Text,IntWritable> {
    @Override
    public int getPartition(Text text, IntWritable intWritable, int i) {
        String phone = text.toString();
        phone = phone.substring(phone.length()-1,phone.length());
        int charge = Integer.parseInt(phone)%2;
        return charge;
    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值