将这两个文件中的内容进行合并整合,输出格式:
1001 date=20170710 name=apple num=2 price=5999 sum=59992
1002 date=20170710 name=xiaomi num=100 price=2999 sum=2999100
文件1:order.txt
1001 20170710 4 2
1002 20170710 3 100
1003 20170710 2 40
1004 20170711 2 23
1005 20170823 4 55
1006 20170824 3 20
1007 20170825 2 3
1008 20170826 4 23
1009 20170912 2 10
1010 20170913 2 2
1011 20170914 3 14
1012 20170915 3 18
文件二: product.txt
1 chuizi 3999
2 huawei 3999
3 xiaomi 2999
4 apple 5999
// Writable --- 序列化,反序列化
public class Item implements Writable{
private int id;
private String date = "";//String类型默认为null 在序列化时不能有null 所以给个空字符串
private String pid = "";
private String name = "";
private int price;
private int num;
@Override
public String toString() {
return "Item [id=" + id + ", date=" + date + ", pid=" + pid + ", name=" + name + ", price=" + price + ", num="
+ num + "]";
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public String getPid() {
return pid;
}
public void setPid(String pid) {
this.pid = pid;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getPrice() {
return price;
}
public void setPrice(int price) {
this.price = price;
}
public int getNum() {
return num;
}
public void setNum(int num) {
this.num = num;
}
@Override
public void readFields(DataInput in) throws IOException {
this.id = in.readInt();
this.date = in.readUTF();
this.pid = in.readUTF();
this.name = in.readUTF();
this.price = in.readInt();
this.num = in.readInt();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(id);
out.writeUTF(date);
out.writeUTF(pid);
out.writeUTF(name);
out.writeInt(price);
out.writeInt(num);
}
}
Map
public class JoinMapper extends Mapper<LongWritable, Text, Text, Item> {
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] arr = line.split(" ");
Item it = new Item();
FileSplit fs = (FileSplit) context.getInputSplit();
String fileName = fs.getPath().getName();
if (fileName.startsWith("order")) {
it.setId(Integer.parseInt(arr[0]));
it.setDate(arr[1]);
it.setPid(arr[2]);
it.setNum(Integer.parseInt(arr[3]));
} else {
it.setPid(arr[0]);
it.setName(arr[1]);
it.setPrice(Integer.parseInt(arr[2]));
}
context.write(new Text(it.getPid()), it);
}
}
Reduce
public class JoinReducer extends Reducer<Text, Item, Item, NullWritable> {
public void reduce(Text _key, Iterable<Item> values, Context context) throws IOException, InterruptedException {
//找到这个包含名字和价格的这个对象
Item product = new Item();
List<Item> its = new ArrayList<>();
for (Item val : values) {
if (val.getPrice() != 0) {
product.setPid(val.getPid());
product.setName(val.getName());
product.setPrice(val.getPrice());
} else {
Item it = new Item();
it.setId(val.getId());
it.setDate(val.getDate());
it.setPid(val.getPid());
it.setNum(val.getNum());
its.add(it);
}
}
for (int i = 0; i < its.size(); i++) {
Item it = its.get(i);
it.setName(product.getName());
it.setPrice(product.getPrice());
context.write(it, NullWritable.get());
}
}
}
Driver
public class JoinDriver {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "JobName");
job.setJarByClass(cn.tedu.join.JoinDriver.class);
job.setMapperClass(JoinMapper.class);
job.setReducerClass(JoinReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Item.class);
job.setOutputKeyClass(Item.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("hdfs://10.42.87.122:9000/mr"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://10.42.87.122:9000/joinresult"));
if (!job.waitForCompletion(true))
return;
}
}