1 Mapper
public class SortMapper extends Mapper<Object, Text, TextInt, IntWritable>{
public TextInt textInt = new TextInt();
public IntWritable intp = new IntWritable(0);
@Override
protected void map(Object key, Text value,
Context context)
throws IOException, InterruptedException {
int i = Integer.parseInt(value.toString());
textInt.setStr(key.toString());
textInt.setValue(i);
intp.set(i);
context.write(textInt,intp);
}
}
2 Partitioner
public class SortPartitioner extends Partitioner<TextInt, IntWritable>{
@Override
public int getPartition(TextInt textInt, IntWritable value, int numReducers) {
return textInt.getStr().hashCode() & Integer.MAX_VALUE % numReducers;
}
}
3 Reducer
public class SortReducer extends Reducer<TextInt, IntWritable, Text, Text>{
@Override
protected void reduce(TextInt textInt, Iterable<IntWritable> values,
Context context)
throws IOException, InterruptedException {
StringBuffer stringCombine = new StringBuffer();
Iterator<IntWritable> itr = values.iterator();
while(itr.hasNext())
{
int value = itr.next().get();
stringCombine.append(value + ",");
}
int length = stringCombine.length();
if(length > 0)
stringCombine.deleteCharAt(length - 1);
context.write(new Text(textInt.getStr()), new Text(stringCombine.toString()));
}
}
4 自定义数据类型TextInt
public class TextInt implements WritableComparable<TextInt> {
private String str;
private int value;
public String getStr() {
return str;
}
public void setStr(String str) {
this.str = str;
}
public int getValue() {
return value;
}
public void setValue(int value) {
this.value = value;
}
@Override
public void readFields(DataInput in) throws IOException {
str = in.readUTF();
value = in.readInt();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(str);
out.writeInt(value);
}
@Override
public int compareTo(TextInt o) {
return o.getStr().compareTo(this.getStr());
}
}
6 自定义的Mapper端的排序比较类
public class TextIntComparator extends WritableComparator{
public TextIntComparator(){
super(TextInt.class, true);
}
@Override
@SuppressWarnings("all")
public int compare(WritableComparable a, WritableComparable b) {
TextInt o1 = (TextInt) a;
TextInt o2 = (TextInt) b;
if(! o1.getStr().equals(o2.getStr()))
return o1.getStr().compareTo(o2.getStr());
else
return o1.getValue() - o2.getValue();
}
}
7 自定义的Reducer端的排序比较类
public class TextComparator extends WritableComparator{
public TextComparator(){
super(TextInt.class, true);
}
@Override
@SuppressWarnings("all")
public int compare(WritableComparable a, WritableComparable b) {
TextInt o1 = (TextInt) a;
TextInt o2 = (TextInt) b;
return o1.getStr().compareTo(o2.getStr());
}
}
8 驱动程序
public class SortMain {
public static void main(String[] args) throws IOException{
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length !=2 )
{
System.err.println("Usage:sort <int><out>");
System.exit(2);
}
Job job = new Job(conf,"sort");
job.setJarByClass(SortMain.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setMapperClass(SortMapper.class);
job.setPartitionerClass(SortPartitioner.class);
job.setMapOutputKeyClass(TextInt.class);
job.setMapOutputValueClass(IntWritable.class);
job.setReducerClass(SortReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setSortComparatorClass(TextIntComparator.class);
job.setGroupingComparatorClass(TextComparator.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
try {
System.exit(job.waitForCompletion(true) ? 0 : 1);
} catch (InterruptedException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}
9 运行和效果
注意:必须得在驱动程序中设置setMapperOutputKey和setMApperOutputValue,默认的是mapper输出value和key类型是Text和Text。