package com.simple;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class IntPair implements WritableComparable<IntPair> {
private int first;
private int second;
public IntPair() {
super();
}
public IntPair(int first, int second) {
super();
this.first = first;
this.second = second;
}
public int getFirst() {
return first;
}
public void setFirst(int first) {
this.first = first;
}
public int getSecond() {
return second;
}
public void setSecond(int second) {
this.second = second;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + first;
result = prime * result + second;
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
IntPair other = (IntPair) obj;
if (first != other.first)
return false;
if (second != other.second)
return false;
return true;
}
@Override
public String toString() {
return "IntPair [first=" + first + ", second=" + second + "]";
}
@Override
public int compareTo(IntPair intPair) {
//首先比较第一个数,当第一个数不一样时,对第一个数进行比较,设置排序规则
if(first-intPair.getFirst()!=0) {
return first>intPair.first?1:-1;
}else {
//当第一个数一样时,比较第二个数,并设置排序规则
return second>intPair.second?1:-1;
}
}
@Override
//readFiedls方法用于序列化过程中的数据读取
public void readFields(DataInput in) throws IOException {
this.first=in.readInt();
this.second=in.readInt();
}
@Override
//write方法用于序列化过程中的数据写出
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeInt(first);
out.writeInt(second);
}
}
package com.simple;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
/*
* 分区函数类
实现其自定义分区功能
*/
public class FirstPartitioner extends Partitioner<IntPair, Text> {
@Override
public int getPartition(IntPair key, Text value, int numPartitions) {
//这里取key的hashcode值*127,然后取其绝对值,对numPartitions取模,这里numPartitions与ReduceTask数保持一致
return Math.abs(key.getFirst() * 127) % numPartitions;
}
}
package com.simple;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/*
* 分组函数类
*/
public class GroupingComparator extends WritableComparator {
// 必须要有这个构造器,构造器中必须要实现这个
protected GroupingComparator() {
super(IntPair.class, true);
}
// 重载 compare:对组合键按第一个自然键排序分组
@SuppressWarnings("rawtypes")
public int compare(WritableComparable w1, WritableComparable w2) {
IntPair ip1 = (IntPair) w1;
IntPair ip2 = (IntPair) w2;
return ip1.compareTo(ip2);
}
}
package com.simple;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class SecondarySortMapper extends Mapper<LongWritable, Text, IntPair, Text> {
private final IntPair keyPair = new IntPair();
String[] lineArr = null;
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//获取行的内容并以一个空格进行分割,然后将切割后的第一个字段赋值给keyPair的first,
// 第二个字段赋值给keyPair的second,并以keyPair作为k,value作为v,写出
String line = value.toString();
lineArr = line.split(" ", -1);
keyPair.setFirst(Integer.parseInt(lineArr[0]));
keyPair.setSecond(Integer.parseInt(lineArr[1]));
context.write(keyPair, value);
}
}
package com.simple;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class SecondarySortReducer extends Reducer<IntPair, Text, Text, Text> {
private static final Text SEPARATOR = new Text("---------------------");
public void reduce(IntPair key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
//对每一个IntPair输出一个"-------"划分观察
context.write(SEPARATOR, null);
//迭代输出
for (Text val : values) {
context.write(null, val);
}
}
}
package com.simple;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class SecondarySortJob {
public static void main(String[] args) throws Exception {
// 获取作业对象
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://localhost:9000");
Job job = Job.getInstance(conf);
// 设置主类
job.setJarByClass(SecondarySortJob.class);
// 设置job参数
job.setMapperClass(SecondarySortMapper.class);
job.setReducerClass(SecondarySortReducer.class);
job.setMapOutputKeyClass(IntPair.class);
job.setMapOutputValueClass(Text.class);
// 设置分区
job.setPartitionerClass(FirstPartitioner.class);
// 设置分组
job.setGroupingComparatorClass(GroupingComparator.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
// 设置job输入输出
FileInputFormat.setInputPaths(job, new Path("/SecondarySort.txt"));
FileOutputFormat.setOutputPath(job, new Path("/simple/output"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}