利用hadoop查询数据中三个最大值
package mr;
import java.net.URI;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MyTopKClass {
static class MyTopKMapper extends Mapper<LongWritable, Text, Text, NullWritable>{
int[] a=new int[3];
public void map(LongWritable k1, Text v1, Context context)
throws java.io.IOException, java.lang.InterruptedException
{
String lines= v1.toString().trim();
if(lines.length()==0){return;}
Arrays.sort(a);
int input=Integer.parseInt(lines);
if(a[0]<input){
a[0]=input;
}
System.out.println("map......");
}
protected void cleanup(Context context) throws java.io.IOException, java.lang.InterruptedException{
for(int x:a)
context.write(new Text(x+""), NullWritable.get());
}
}
private static String INPUT_PATH="hdfs://master:9000/input/topk.dat";
private static String OUTPUT_PATH="hdfs://master:9000/output/c/";
public static void main(String[] args) throws Exception {
Configuration conf=new Configuration();
FileSystem fs=FileSystem.get(new URI(OUTPUT_PATH),conf);
if(fs.exists(new Path(OUTPUT_PATH)))
fs.delete(new Path(OUTPUT_PATH));
Job job=new Job(conf,"myjob");
job.setJarByClass(MyTopKClass.class);
job.setMapperClass(MyTopKMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job,new Path(INPUT_PATH));
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
job.waitForCompletion(true);
}
}