给定两个文件file1.txt和file2.txt
file1中:
青岛
淄博
烟台
德州
济南
滨州
莱芜
潍坊
file2:
济南 济南机场
青岛 青岛机场
潍坊 潍坊机场
烟台 烟台机场
这样的话没有飞机场的城市就是淄博、德州、滨州、莱芜
上代码:
Mapper:
package airport_choose;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import java.io.IOException;
public class airportMapper extends Mapper<LongWritable,Text,Text,Text> {
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
String line = value.toString();
String flag;
String st;
//System.out.print(line); //两个文件的内容都统计到line里面了,济南 济南机场青岛 青岛机场潍坊 潍坊机场烟台 烟台机场 青岛淄博烟台德州济南滨州莱芜潍坊
if(line.contains("机场")){
String str[]=line.split(" ");
st=str[0]; //这里是 济南 济南机场 str[0]是 济南
// System.out.println(st);
// System.out.println("**************");
flag="s_";
}else {
st=line;
// System.out.println(st);
flag="a_";
}
Text st1=new Text(st); //类型转换 String 转 Text
Text flag1=new Text(flag);
context.write(st1, flag1);
}
}
Reduce:
package airport_choose;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
public class airportReducer extends Reducer<Text,Text,Text,Text>{
protected void reduce(Text key,Iterable<Text> values,Context context)throws IOException, InterruptedException{
String st3="";
for( Text value:values) { //这里的value是flag1,是s_或者a_
//System.out.println(value); //这里是12个 a_ 或者s_
st3+=value.toString();
//System.out.println(st3); //这里st3是 a_s_ 因为济南他们有两个值有个a_和s_
}
//System.out.println(st3); //去重之后,济南他们对应两个值a_s_
int s=0;
if(st3.contains("s")) {
s=1;
}
if(s==0) {
context.write(key, null);
}
}
}
Runner:
package airport_choose;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
class airportRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(airportMapper.class);
job.setReducerClass(airportReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileSystem fs = FileSystem.get(conf);
fs .delete(new Path("D:\\hadoop_practice\\airport\\output"),true);
// 每次运行前删除文件
FileInputFormat.setInputPaths(job, new Path("D:\\hadoop_practice\\{airport/*}"));
FileOutputFormat.setOutputPath(job, new Path("D:\\hadoop_practice\\airport\\output"));
job.waitForCompletion(true);
}
}