二次排序,对value进行排序。代码都是自己亲测的可用的。
总结:分三步
1.先自定义数据类型,我这里是CombaleKey(说的通俗一点,就是把value的值也放到key中来排序),也可以叫组合key。
2.分区类
3.排序对比器
4.分区对比器
输入:
123,13
132,14
123,15
123,12
132,15
输出:
123,12
123,13
123,15
132,14
132,15
现在按照步骤来做:
1.自定义数据类型,组合key。
/*
组合key 类
*/
public class CombaKey implements WritableComparable<CombaKey> {
private int number1;
private int number2;
public int getNumber1() {
return number1;
}
public void setNumber1(int number1) {
this.number1 = number1;
}
public int getNumber2() {
return number2;
}
public void setNumber2(int number2) {
this.number2 = number2;
}
/*
比较大小
*/
@Override
public int compareTo(CombaKey o) {
int onumber1 = o.getNumber1();
int onumber2 = o.getNumber2();
if(number1 == onumber1){
return -(number2-onumber2);//number2降序
}
else
return number1-onumber1; //nubmer1升序
}
/*
序列化
*/
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(number1);
dataOutput.writeInt(number2);
}
/*
反序列化
*/
@Override
public void readFields(DataInput dataInput) throws IOException {
this.number1 = dataInput.readInt();
this.number2 = dataInput.readInt();
}
}
/*
2. number1分区类 按number1分区 (按第一个字段分类)
*/
public class Number1_Partitioner extends Partitioner<CombaKey, NullWritable> {
@Override
public int getPartition(CombaKey key, NullWritable value, int i) {
int number1 = key.getNumber1();
//这里就让他们都进同一分区就行了
return number1%i;
}
}
/*
3. Combakey 排序对比器类
*/
public class Sort extends WritableComparator {
public Sort(){
super(CombaKey.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
CombaKey k1 = (CombaKey) a;
CombaKey k2 = (CombaKey) b;
//这里调用了CombaKey的比较大小方法。
return k1.compareTo(k2);
}
}
/*
4. 组合key分组对比器类
*/
public class Comparkey_group_Comparator extends WritableComparator {
public Comparkey_group_Comparator(){
super(CombaKey.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
CombaKey number1 = (CombaKey) a;
CombaKey number2 = (CombaKey) b;
//这里返回0就表示是一组的。
return number1.getNumber1()-number2.getNumber1();
}
}
5. 上面该写的都写好了,现在开始跑我们的mapreduce
先写一个mapper类
Mapper类(mapper类是按行读取的)
public class Map extends Mapper<LongWritable,Text,CombaKey, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] lines = value.toString().split(",");//将取出每一行的按逗号值分割
CombaKey combaKey = new CombaKey();
combaKey.setNumber1(Integer.parseInt(lines[0]));
combaKey.setNumber2(Integer.parseInt(lines[1]));
System.out.println(combaKey.getNumber1()+" "+combaKey.getNumber2());
context.write(combaKey,NullWritable.get());
}
}
6. Reducer类
public class Reduce extends Reducer<CombaKey,NullWritable,Text,NullWritable>{
@Override
protected void reduce(CombaKey key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
System.out.println("===============================");
for(NullWritable v:values){
System.out.printf("key.getNumber1=%d,key.getNumber2=%d,values=%s\r\n",key.getNumber2(),key.getNumber2(),values.toString());
context.write(new Text(key.getNumber1()+"\t"+key.getNumber2()),NullWritable.get());
}
}
}
7. // 最后就是Main方法了
//我这里用的是本地调式。记住包别导错了,要用hadoop下的包。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Maprereduce_main{
public static void main(String[] args) throws Exception{
Configuration configuration = new Configuration();
//如果存在删除(这样就不用不用手动删除文件夹了)
FileSystem fileSystem = FileSystem.get(configuration);
Path path = new Path("/data/output/test");
//判断文件夹是否存在,存在就删除
if(fileSystem.exists(path)){
fileSystem.delete(path,true);
}
Job job = Job.getInstance(configuration);
job.setJobName("My_MR");
job.setJarByClass(Maprereduce_main.class);
//设置自己重写的mapper类和reducer类
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
//这里是设置mapper的输出key和value的数据类型
job.setMapOutputKeyClass(CombaKey.class);
job.setMapOutputValueClass(NullWritable.class);
//设置reducer的输出key和value的数据类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//分区类
job.setPartitionerClass(Number1_Partitioner.class);
//排序对比器
job.setSortComparatorClass(Sort.class);
//分组对比器
job.setGroupingComparatorClass(Comparkey_group_Comparator.class);
// 设置输入和输出目录
FileInputFormat.addInputPath(job, new Path("这里写你的文件输入路径"));
FileOutputFormat.setOutputPath(job,new Path("文件输出路径"));
job.waitForCompletion(true);
}}
完事了。一个简单的mapreducer二次排序 就搞定了。