reduce,对于许多的key我们的分布式进行分发,然后根据我们设计的计算方式得出结果
package com.tem; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import com.exercise.hadoop.command.HadoopManipulate; public class MaxTemperature { public static Configuration config; static{ config=new Configuration(); } public static void main(String[] args) throws IOException { // if (args.length !=2) { // System.err.println("Usage: Maxtemperature <input path> <output path>"); // System.exit(-1); // } JobConf conf = new JobConf(MaxTemperature.class); conf.setJobName("max temperature"); FileInputFormat.addInputPath(conf, new Path("/t1/test")); FileOutputFormat.setOutputPath(conf, new Path("/z2")); conf.setMapperClass(MaxTemperatureMapper.class); conf.setReducerClass(MaxTemperatureReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); JobClient.runJob(conf); HadoopManipulate hm = new HadoopManipulate(); hm.readFile("/z2/part-00000"); } }
package com.tem; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; public class MaxTemperatureReducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> { public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { int maxValue = Integer.MIN_VALUE; while (values.hasNext()) { maxValue = Math.max(maxValue, Integer.parseInt(values.next().toString())); } output.collect(key, new Text(String.valueOf(maxValue))); } }
Mapper,作为hadoop强大的统计工具,讲我们大数据在文件中按照我们规定的格式写出后,对key进行查重合并传给reducepackage com.tem; import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; public class MaxTemperatureMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> { /** * @param args */ public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String line =value.toString(); if(line.split(" ").length==2) { String first = line.split(" ")[0]; String second=line.split(" ")[1]; output.collect(new Text(first), new Text(second)); } } }
MapReduce 简单实现统计
最新推荐文章于 2021-12-29 15:41:45 发布