package xlzx.mapreduce; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class Distinct { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{ if(args.length < 3){ System.out.println("There can be only three input and output parameter"); } Configuration configuration = new Configuration(); Job job = Job.getInstance(configuration, "data Distinct"); job.setJarByClass(Distinct.class); job.setMapperClass(WordMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(WordReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileInputFormat.setInputPaths(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2])); System.exit(job.waitForCompletion(true) ? 0 : 1); } public static class WordMapper extends Mapper<LongWritable, Text, Text, Text>{ protected void map(LongWritable key, Text value1, Context context) throws IOException, InterruptedException{ String line = value1.toString(); context.write(new Text(line), new Text("")); } } public static class WordReducer extends Reducer<Text, Text, Text, Text>{ protected void reduce(Text key, Iterable<Text> value2, Context context) throws IOException, InterruptedException{ context.write(key, new Text("")); } } }
Mapreduce去重~Java
最新推荐文章于 2021-02-27 11:18:38 发布