源码下载地址
- 文件01
1 Beijing
2 Guangzhou
3 Shenzhen
4 xi’an
- 文件02
factoryname addressId
BeiJing Red Star 1
ShenZhen Thunder 3
Guangzhou Honda 2
Beijing Rising 1
Guangzhou Development Bank 2
Tencent 3
Bank of Beijing 1
- 希望结果
Bank of Beijing Beijing
Beijing Rising Beijing
BeiJing Red Star Beijing
Guangzhou Development Bank Guangzhou
Guangzhou Honda Guangzhou
Tencent Shenzhen
ShenZhen Thunder Shenzhen
- 执行代码
package com.bipt.model.wether;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MrCityId extends Configured implements Tool {
public static class ModelMap extends Mapper<LongWritable, Text, Text, Text> {
Text k = new Text();
Text v = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
if (!line.contains("address")) {// 非第一行无效的标题行可以进入
String[] split = line.split("\\t");// tab分割
if ((split[0].charAt(0) + "").matches("\\d{1}")) {//判断首字母是否包含数字
k.set(split[0]);
v.set("a" + split[1]);
} else {
k.set(split[1]);
v.set("f" + split[0]);
}
context.write(k, v);
}
}
}
public static class ModelReduce extends Reducer<Text, Text, Text, Text> {
Text k = new Text();
Text v = new Text();
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
ArrayList<String> add = new ArrayList<>();
ArrayList<String> factory = new ArrayList<>();
for (Text v : values) {
String line = v.toString();
char c = line.charAt(0);//取出第一位首字符
String str = line.substring(1);//取出后部分使用字段
if(c == 'a'){//地址
add.add(str);
}else{//公司
factory.add(str);
}
}
for (int i = 0; i < factory.size(); i++) {
for (int j = 0; j < add.size(); j++) {
k.set(factory.get(i));
v.set(add.get(j));
context.write(k, v);
}
}
}
}
@Override
public int run(String[] args) throws Exception {
// 判断
if (args.length < 2) {
System.out.println("error >>> <in> ....<out>");
System.out.println("参数有问题");
System.exit(2);
}
// 初始化信息
Configuration conf = getConf();
Job job = Job.getInstance(conf);
job.setJobName("MrCityId");// 设置工作的名字
job.setJarByClass(MrCityId.class);
// 设置 mapper有关
job.setMapperClass(ModelMap.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
// 设置reduce有关
job.setReducerClass(ModelReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 启动事件
return job.waitForCompletion(true) ? 0 : 1;
}
public static void main(String[] args) throws Exception {
System.exit(ToolRunner.run(new MrCityId(), args));
}
}