1、《hadoop 实战》书上的例子并不能运行成功。以下我的代码,可以运行成功:
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.contrib.utils.join.DataJoinMapperBase;
import org.apache.hadoop.contrib.utils.join.DataJoinReducerBase;
import org.apache.hadoop.contrib.utils.join.TaggedMapOutput;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class DataJoin extends Configured implements Tool{
public static class DataJoinMapper extends DataJoinMapperBase {
@Override
protected Text generateGroupKey(TaggedMapOutput aRecord) {
return new Text(aRecord.getData().toString().split(",")[0]);
}
@Override
protected Text generateInputTag(String inputFiles) {
return new Text(inputFiles);
}
@Override
protected TaggedMapOutput generateTaggedMapOutput(Object value) {
TaggedMapOutput ret = new TaggedWritable((Text)value);
ret.setTag(this.inputTag);
return ret;
}
}
public static class TaggedWritable extends TaggedMapOutput {
private Writable data;
public TaggedWritable() {
this.tag = new Text("");
this.data = new Text("");
}
public TaggedWritable(Writable data) {
this.tag = new Text("");
this.data = data;
}
@Override
public void write(DataOutput data) throws IOException {
this.tag.write(data);
this.data.write(data);
}
@Override
public void readFields(DataInput in) throws IOException {
this.data.readFields(in);
this.tag.readFields(in);
}
@Override
public Writable getData() {
return data;
}
}
public static class DataJoinReducer extends DataJoinReducerBase {
@Override
protected TaggedMapOutput combine(Object[] tags, Object[] values) {
if (tags.length < 2) {
return null;
}
StringBuilder joinedStr = new StringBuilder();
for (int i = 0; i < values.length; i++) {
if (i > 0) {
joinedStr.append(",");
}
TaggedWritable tw = (TaggedWritable)values[i];
String line = ((Text)tw.getData()).toString();
String[] tokens = line.split(",",2);
joinedStr.append(tokens[1]);
}
TaggedWritable ret = new TaggedWritable(new Text(joinedStr.toString()));
ret.setTag((Text)tags[0]);
return ret;
}
}
@Override
public int run(String[] args) throws Exception {
JobConf job = new JobConf(getConf());
job.setJarByClass(getClass());
job.setJobName("datajoin");
Path in1 = new Path("/join/customers/");
Path in2 = new Path("/join/orders/");
FileInputFormat.addInputPath(job, in1);
FileInputFormat.addInputPath(job, in2);
Path out = new Path("/join/output/");
FileOutputFormat.setOutputPath(job, out);
job.setMapperClass(DataJoinMapper.class);
job.setReducerClass(DataJoinReducer.class);
job.setInputFormat(TextInputFormat.class);
job.setOutputFormat(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(TaggedWritable.class);
job.set("mapred.textoutputformat.separator",",");
JobClient.runJob(job);
return 0;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new DataJoin(),args);
System.exit(res);
}
}
可以参考:
http://www.cnblogs.com/aprilrain/archive/2013/01/28/2880460.html