package mrpro927;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/*
*需求:数据去重,利用key的排序分组,不需要reduce
*
*/
public class phoneDataQuChong {
//
public static class MyMapper extends Mapper<LongWritable, Text, Text, NullWritable>{
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, Text, NullWritable>.Context context)
throws IOException, InterruptedException {
context.write(value, NullWritable.get());
}
}
public s
mapreduce进行数据去重的简单案例
最新推荐文章于 2024-10-05 10:17:10 发布
利用MapReduce框架,通过设置目标字段为key,在map阶段输出,reduce阶段自然会去重,提供了一种简单有效处理数据重复问题的方法。
摘要由CSDN通过智能技术生成