练习题如下:需要输出每个顾客的订单详情。
涉及到的文件:
Customers
1,Stephanie Leung,555-555-5555
2,Edward Kim,123-456-7890
3,Jose Madrize,281-330-8004
4,Davia Stork,408-555-0000
3,A,12.95,02-Jun-2008
1,B,88.25,20-May-2008
2,C,32.00,30-Nov-2007
3,D,25.02,22-Jan-2009
具体程序如下:
package Self;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.contrib.utils.join.DataJoinMapperBase;
import org.apache.hadoop.contrib.utils.join.DataJoinReducerBase;
import org.apache.hadoop.contrib.utils.join.TaggedMapOutput;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class DataJoinTest extends Configured implements Tool {
public static class MapperClass extends DataJoinMapperBase
{
@Override
protected Text generateGroupKey(TaggedMapOutput value) {
String[] lines=value.toString().split(",");
return new Text(lines[0]);
}
@Override
protected Text generateInputTag(String inputFile) {
return new Text(inputFile);
}
@Override
protected TaggedMapOutput generateTaggedMapOutput(Object object) {
TaggedValue taggedValue=new TaggedValue((Text) object);
taggedValue.setTag(this.inputTag);
return taggedValue;
}
}
public static class TaggedValue extends TaggedMapOutput
{
private Writable data;
public TaggedValue(Writable data)
{
this.data=data;
this.tag=new Text("");
}
public void readFields(DataInput dataInput) throws IOException {
this.data.readFields(dataInput);
this.tag.readFields(dataInput);
}
public void write(DataOutput out) throws IOException {
this.data.write(out);
this.tag.write(out);
}
@Override
public Writable getData() {
return this.data;
}
}
public static class ReducerClass extends DataJoinReducerBase{
@Override
protected TaggedMapOutput combine(Object[] tags, Object[] values) {
if(tags.length<2) return null;
String joinStr="";
for (int i=0;i<values.length;i++) {
if(i>0) joinStr+=",";
String[] lines=((TaggedMapOutput)values[i]).getData().toString().split(",",2);
joinStr+=lines[1];
}
TaggedValue taggedValue=new TaggedValue(new Text(joinStr));
taggedValue.setTag((Text)tags[0]);
return taggedValue;
}
}
public int run(String[] args) throws Exception {
Configuration configuration=getConf();
JobConf job=new JobConf(configuration, DataJoinTest.class);
job.setJobName("DataJoinTest");
Path in=new Path(args[0]);
Path out=new Path(args[1]);
FileInputFormat.setInputPaths(job, in);
FileOutputFormat.setOutputPath(job, out);
job.setMapperClass(MapperClass.class);
job.setReducerClass(ReducerClass.class);
job.setInputFormat(TextInputFormat.class);
job.setOutputFormat(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(TaggedValue.class);
job.set("mapred.textoutputformat.separator",",");
JobClient.runJob(job);
return 0;
}
public static void main(String[] args) throws Exception {
int code=ToolRunner.run(new Configuration(), new DataJoinTest(),args);
System.exit(code);
}
}