address:
1 Beijing
2 Guangzhou
3 Shenzhen
4 Xian
factory:
Beijing Red Star 1
Shenzhen Thunder 3
Guangzhou Honda 2
Beijing Rising 1
Guangzhou Development Bank 2
Tencent 3
Bank of Beijing 1
思路分析:
根据id来当做key 把文件名-城市名-id当做values
mapper输出:
1 address.txt-Beijing-1,factory.txt-Beijing Red Star-1,factory.txt-Beijing Rising-1,factory.txt-Bank of Beijing-1
2 address.txt-Guangzhou-2,factory.txt-Guangzhou Honda-2,factory.txt-Guangzhou Development Bank-2
3 address.txt-Shenzhen-3,factory.txt-Shenzhen Thunder-3,factory.txt-Tencent-3
4 address.txt-Xian-4
代码如下<打成jar包 在Linux上运行>:
package com.alibaba.hdfs;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Gong {
public static class MRMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
FileSplit fileSplit=(FileSplit) context.getInputSplit();
String name = fileSplit.getPath().getName();
String[] split = value.toString().split("\t");
if (name.equals("address.txt")) {
context.write(new Text(split[0]), new Text(name+"-"+split[1]+"-"+split[0]));
}
if (name.equals("factory.txt")) {
context.write(new Text(split[1]), new Text(name+"-"+split[0]+"-"+split[1]));
}
}
}
public static class MRReducer extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String factory_place="";
String address_place="";
int factory_id=0;
int address_id=0;
for (Text t : values) {
String[] split = t.toString().split("-");
String name = split[0];
if(name.equals("address.txt")){
address_place=split[1];
address_id=Integer.parseInt(split[2]);
}
if(name.equals("factory.txt")){
factory_place=split[1];
factory_id=Integer.parseInt(split[2]);
}
//若两个文件的id一致 就输出对应的结果
if (factory_id==address_id) {
context.write(new Text(factory_place), new Text(address_place));
}
}
}
}
public static void main(String[] args)throws Exception {
//创建任务配置对象
Configuration cfg = new Configuration();
//个性化任务
Job job = Job.getInstance(cfg);
//指定jar包要执行的main方法所在的类
job.setJarByClass(Gong.class);
//指定要计算的文件输入路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
//设置mapper类
job.setMapperClass(MRMapper.class);
//设置mapper输出的key跟value的类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//设置reducer所在的类
job.setReducerClass(MRReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//提交任务
job.waitForCompletion(true);
}
}