找出三个最大值demo_mapreduce找三组数据最大值前三-CSDN博客

利用hadoop查询数据中三个最大值

package mr;

import java.net.URI;
import java.util.Arrays;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class MyTopKClass {

    static class MyTopKMapper  extends  Mapper<LongWritable, Text, Text, NullWritable>{  
        int[]  a=new int[3];

         public void map(LongWritable k1, Text v1, Context context) 
                         throws java.io.IOException, java.lang.InterruptedException
         {
            String  lines= v1.toString().trim();
            if(lines.length()==0){return;}
             Arrays.sort(a);
             int input=Integer.parseInt(lines);
             if(a[0]<input){
                 a[0]=input;
             }


        System.out.println("map......");
         }

         protected void cleanup(Context context) throws java.io.IOException, java.lang.InterruptedException{

             for(int x:a)
                 context.write(new Text(x+""), NullWritable.get());


         }




    }



    private static String INPUT_PATH="hdfs://master:9000/input/topk.dat";
    private static String OUTPUT_PATH="hdfs://master:9000/output/c/";

    public static void main(String[] args) throws Exception {   

        Configuration  conf=new Configuration();
        FileSystem  fs=FileSystem.get(new URI(OUTPUT_PATH),conf);

        if(fs.exists(new Path(OUTPUT_PATH)))
                fs.delete(new Path(OUTPUT_PATH));

        Job  job=new Job(conf,"myjob");

        job.setJarByClass(MyTopKClass.class);
        job.setMapperClass(MyTopKMapper.class);

         job.setMapOutputKeyClass(Text.class);
         job.setMapOutputValueClass(NullWritable.class);


        FileInputFormat.addInputPath(job,new Path(INPUT_PATH));
        FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));

        job.waitForCompletion(true);

    }


}