mapreduce模板

最新推荐文章于 2021-02-20 21:32:04 发布

我_是好人

最新推荐文章于 2021-02-20 21:32:04 发布

阅读量274

点赞数

本文链接：https://blog.csdn.net/qq_34952846/article/details/80461160

版权

好久没有更新博客了，最近复习了一下mapreduce，有一点忘记代码了，我还是弄一点模板。


import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class SortCount
{
  public static class SortMapper
    extends Mapper<LongWritable, Text, InfoBean, NullWritable>   //map方法
  {
    private InfoBean i = new InfoBean();
    
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, InfoBean, NullWritable>.Context context)
      throws IOException, InterruptedException
    {
      String line = value.toString();
      try
      {
        String[] fields = line.split("\\s+");    //正则表达式
        String name = fields[0];
        long sum = Long.parseLong(fields[1]);
        this.i.set(sum, name);
        context.write(this.i, NullWritable.get());  
      }
      catch (Exception e)
      {
        e.printStackTrace();
      }
    }
  }
  
  public static class SortReducer
    extends Reducer<InfoBean, NullWritable, Text, InfoBean>  //reduce方法
  {
    private Text t = new Text();
    private InfoBean i = new InfoBean();
    
    protected void reduce(InfoBean key, Iterable<NullWritable> values, Reducer<InfoBean, NullWritable, Text, InfoBean>.Context context)
      throws IOException, InterruptedException
    {
      String name = key.getName();
      this.t.set(name);
      this.i.set(key.getSum(), "");
      context.write(this.t, this.i);
    }
  }
  
  public static void main(String[] args)
    throws IOException, ClassNotFoundException, InterruptedException
  {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    
    job.setJarByClass(SortCount.class);
    
    job.setMapperClass(SortMapper.class);
    job.setMapOutputKeyClass(InfoBean.class);
    job.setMapOutputValueClass(NullWritable.class);
    FileInputFormat.setInputPaths(job, new Path[] { new Path(args[0]) });
    
    job.setReducerClass(SortReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(InfoBean.class);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    
    job.waitForCompletion(true);
  }
}

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;

public class InfoBean
  implements WritableComparable<InfoBean>  //调用WritableComparable接口（其中包含排序），也可以调用Writable接口
{
  private long sum;
  private String name;
  
  public void set(long sum, String name)
  {
    this.sum = sum;
    this.name = name;
  }
  
  public String toString()
  {
    return this.name + this.sum;
  }
  
  public long getSum()
  {
    return this.sum;
  }
  
  public void setSum(long sum)
  {
    this.sum = sum;
  }
  
  public String getName()
  {
    return this.name;
  }
  
  public void setName(String name)
  {
    this.name = name;
  }
  
  public void write(DataOutput out)
    throws IOException
  {
    out.writeUTF(this.name);
    out.writeLong(this.sum);
  }
  
  public void readFields(DataInput in)
    throws IOException
  {
    this.name = in.readUTF();
    this.sum = in.readLong();
  }
  
  public int compareTo(InfoBean arg0)
  {
    return this.sum > arg0.getSum() ? -1 : 1;
  }
}