Hadoop多文件的自定义分区

测试数据

file1:

2 3 4 12 1212 121 23 232
45 545 7667 323 5454 7676 2323
655 12 1212 121
23 232 45 545 7667 323 5454
7676 2323 655
43333 334 34 22222 2222 33333
121 232 4 545 65 87 454 234
121 232 4 545 65 87 454 234
121 232 4 545 65 87 454 234

file2:

2 3 4 12 1212 121 23 232
45 545 7667 323 5454 7676 2323
655 12 1212 121
23 232 45 545 7667 323 5454
7676 2323 655
43333 334 34 22222 2222 33333
121 232 4 545 65 87 454 234
121 232 4 545 65 87 454 234
121 232 4 545 65 87 454 234

自定义Mapper

package com.test.sort;
import com.test.phone.PhoneBean;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class SortMapper extends Mapper<LongWritable,Text, Text, IntWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();   //拿到一行数据
        String[] fields =  line.split(" ");//切分成各个字段
        for (String field :fields){
            //封装数据为key-VALUE进行输出
            context.write(new Text(field),new IntWritable(1));
        }
    }
}

自定义Partition

package com.test.sort;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class SortPartition extends Partitioner<Text, IntWritable> {
    @Override
    public int getPartition(Text text, IntWritable IntWritable, int numPartitions) {
        int length = text.toString().length();//获取手机号码前三位
        int partition = 4;
        switch(length)
        { //根据手机号前置设置分区
            case 1:
                partition = 0;//必须从0开始
                break;
            case 2:
                partition = 1;
                break;
            case 3:
                partition = 2;
                break;
            case 4:
                partition = 3;
                break;
            default:
                break;
        }
        return partition;
    }
}

自定义Reduce

package com.test.sort;
import com.test.phone.PhoneBean;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class SortReduce extends Reducer<Text, IntWritable ,Text, IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int sum=0;
        for (IntWritable value: values){
            sum+=value.get();
        }
        context.write(key,new IntWritable(sum));
    }
}

自定义Driver

package com.test.sort;
import com.test.phone.PhoneBean;
import com.test.phone.PhoneMapper;
import com.test.phone.PhonePartition;
import com.test.phone.PhoneReduce;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class SortDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        args = new String[2];
        args[0] = "src/main/resources/input";
        args[1] = "src/main/resources/output";
        Configuration cfg = new Configuration();
        cfg.set("mapreduce.framework.name", "local");
        cfg.set("fs.defaultFS", "file:///");
        final FileSystem filesystem = FileSystem.get(cfg);
        if (filesystem.exists(new Path(args[0]))) {
            filesystem.delete(new Path(args[1]), true);
        }
        Job job = Job.getInstance(cfg);
        job.setJarByClass(SortDriver.class);
        job.setMapperClass(SortMapper.class);
        job.setReducerClass(SortReduce.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setPartitionerClass(SortPartition.class);
        job.setNumReduceTasks(5);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));        // 输入路径
        FileOutputFormat.setOutputPath(job, new Path(args[1]));        // 输出路径
        // 提交任务
        int ec = job.waitForCompletion(true) ? 0 : 1;
        System.exit(ec);
    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

缘不易

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值