Mapreduce算法二、数据去重(HashSet)

package MRDemo;

import java.io.IOException;
import java.util.HashSet;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import MRDemo.PV.PVCombine;
import MRDemo.PV.PVMap;
import MRDemo.PV.PVReduce;

import com.google.common.collect.Sets;

public class ProductKind {
public static void main(String[] args) throws Exception {
    if (args.length!=2) {
        System.exit(0);
    }
    Job job = Job.getInstance(new Configuration(), "ProductKind");
    job.setJarByClass(ProductKind.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(ProductCountMap.class);
    //job.setCombinerClass(PVCombine.class);
    job.setReducerClass(ProductCountReduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.waitForCompletion(true);
}

public static class ProductCountMap extends Mapper<LongWritable, Text,  Text, Text>{
    Text province = new Text();
    Text kind = new Text();

protected void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,Text,Text>.Context context) throws java.io.IOException ,InterruptedException {
    String[] lines = value.toString().split("\t");
    if (lines.length == 6) {
        kind.set(lines[0].trim());
        province.set(lines[4].trim()    );
        context.write(province, kind);
    }
};  
}
//
public static class ProductCountReduce extends Reducer<Text, Text, Text, IntWritable>{
    protected void reduce(Text key, java.lang.Iterable<Text> values, org.apache.hadoop.mapreduce.Reducer<Text,Text,Text,IntWritable>.Context context) throws java.io.IOException ,InterruptedException {

        //context.write(new Text("getHttp_user_agent"), new IntWritable(1));
        HashSet<String> hashSet = new HashSet<String>();
        for (Text value : values) {
            hashSet.add(value.toString());
        }
        if(hashSet.size()>0){
            context.write(key, new IntWritable(hashSet.size()));
        }

    };
}

}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值