好久没有更新博客了,最近复习了一下mapreduce,有一点忘记代码了,我还是弄一点模板。
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class SortCount
{
public static class SortMapper
extends Mapper<LongWritable, Text, InfoBean, NullWritable> //map方法
{
private InfoBean i = new InfoBean();
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, InfoBean, NullWritable>.Context context)
throws IOException, InterruptedException
{
String line = value.toString();
try
{
String[] fields = line.split("\\s+"); //正则表达式
String name = fields[0];
long sum = Long.parseLong(fields[1]);
this.i.set(sum, name);
context.write(this.i, NullWritable.get());
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
public static class SortReducer
extends Reducer<InfoBean, NullWritable, Text, InfoBean> //reduce方法
{
private Text t = new Text();
private InfoBean i = new InfoBean();
protected void reduce(InfoBean key, Iterable<NullWritable> values, Reducer<InfoBean, NullWritable, Text, InfoBean>.Context context)
throws IOException, InterruptedException
{
String name = key.getName();
this.t.set(name);
this.i.set(key.getSum(), "");
context.write(this.t, this.i);
}
}
public static void main(String[] args)
throws IOException, ClassNotFoundException, InterruptedException
{
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(SortCount.class);
job.setMapperClass(SortMapper.class);
job.setMapOutputKeyClass(InfoBean.class);
job.setMapOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path[] { new Path(args[0]) });
job.setReducerClass(SortReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(InfoBean.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class InfoBean
implements WritableComparable<InfoBean> //调用WritableComparable接口(其中包含排序),也可以调用Writable接口
{
private long sum;
private String name;
public void set(long sum, String name)
{
this.sum = sum;
this.name = name;
}
public String toString()
{
return this.name + this.sum;
}
public long getSum()
{
return this.sum;
}
public void setSum(long sum)
{
this.sum = sum;
}
public String getName()
{
return this.name;
}
public void setName(String name)
{
this.name = name;
}
public void write(DataOutput out)
throws IOException
{
out.writeUTF(this.name);
out.writeLong(this.sum);
}
public void readFields(DataInput in)
throws IOException
{
this.name = in.readUTF();
this.sum = in.readLong();
}
public int compareTo(InfoBean arg0)
{
return this.sum > arg0.getSum() ? -1 : 1;
}
}