最近几天在准备mapreduce。然后试着写了一个mapreduce程序。来实现二次排序 。
话不多说,我们先 自定义一个键类型。切记键类型实现WritableComparable。接口,然后重写toString,writable,readfield.
先附上已经写好的代码:
package com.soft;
import org.apache.hadoop.examples.SecondarySort;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class InPair implements WritableComparable<InPair> {
private IntWritable first;
private IntWritable second;
public void set(IntWritable first,IntWritable second){
this.first = first;
this.second = second;
}
//这里空参构造器必须写,否则会报错
public InPair(){
set(new IntWritable(),new IntWritable());
}
//这里将类型强制转换。
public InPair(int first,int second){
set(new IntWritable(first),new IntWritable(second));
}
public InPair(IntWritable first,IntWritable second){
set(first,second);
}
public void setFirst(IntWritable first) {
this.first = first;
}
public void setSecond(IntWritable second) {
this.second = second;
}
public IntWritable getFirst() {
return first;
}
public IntWritable getSecond() {
return second;
}
public void write(DataOutput out)throws IOException {
first.write(out);
second.write(out);
}
public void readFields(DataInput in)throws IOException{
first.readFields(in);
second.readFields(in);
}
public String toString(){
return first+"\t"+second;
}
//强制转换类
//这里的话这个东西 可以写可以你不写。
/*
public boolean equals(Object o){
if (o instanceof InPair){
InPair tp = (InPair)o;
return first.equals(tp.first)&&second.equals(tp.second);
}
return false;
}
*/
//重写比较方法,实现二次排序
public int compareTo(InPair tp){
int cmp = first.compareTo(tp.first);
if (cmp !=0) {
return cmp;
}
return second.compareTo(tp.second);
}
}
这里的话自定义键类型已经写好,接下来是map程序
package com.soft;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class soft {
//这里切记,是LongWritable,Text. 千万不能写错。
public static class map1 extends Mapper<LongWritable, Text,InPair, NullWritable>{
private IntWritable key1 = new IntWritable();
private IntWritable key2 = new IntWritable();
public void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException{
String line = value.toString();
String[] list = line.split("\t");
key1.set(Integer.parseInt(list[0]));
key2.set(Integer.parseInt(list[1]));
context.write(new InPair(key1,key2),NullWritable.get());
}
}
public static class reduce1 extends Reducer<InPair,NullWritable,InPair,NullWritable>{
public void reduce(InPair key,Iterable<NullWritable>values,Context context)throws IOException,InterruptedException{
context.write(key,NullWritable.get());
}
}
//这里的话我觉得可以省略rudece方法 因为reduce仅仅是将数据写了一下
public static void main(String[] args)throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(soft.class);
job.setMapperClass(map1.class);
job.setReducerClass(reduce1.class);
job.setMapOutputKeyClass(InPair.class);
job.setMapOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job,new Path("E:\\data1\\ceshi.txt"));
FileOutputFormat.setOutputPath(job,new Path("E:\\data1\\out"));
Boolean b = job.waitForCompletion(true);
System.out.println(b?0:1);
}
}