package MRDemo;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* 统计没有农产品市场的省份有哪些,通过map join的方式统计
*
*
*/
public class NotMarketProvince {
public static void main(String[] args) throws Exception {
if(args.length!=3){
System.exit(0);
}
Job job = new Job(new Configuration(), "NotMarketProvince");
job.setJarByClass(NotMarketProvince.class);
//分别设置输入与输出路径,注意此需要提供两个输入路径,所以用到MultipleInputs
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, AllProvinceMap.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MarketProvinceMap.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.setReducerClass(ProvinceReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.waitForCompletion(true);
}
/**
* 第一个map,所有的省份
*/
public static class AllProvinceMap extends Mapper<LongWritable, Text, Text, Text>{
public static final String LABEL="a_"; //标识全部的省份
protected void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,Text,Text>.Context context) throws java.io.IOException ,InterruptedException {
/**
* info的形式如下:
* a_北京 a_山东
*/
String info=LABEL+value.toString();//
//第一个map输出的中间结果形式如下:
//<北京,a_北京>
context.write(value, new Text(info));
};
}
/**
* 第二个map,具有农产品市场的省份
*/
public static class MarketProvinceMap extends Mapper<LongWritable, Text, Text, Text>{
public static final String LABEL="y_"; //标识具有农产品市场的省份
protected void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,Text,Text>.Context context) throws java.io.IOException ,InterruptedException {
String[] pros=value.toString().split("\t");
if (pros.length==6) {
String province=pros[4].trim();
context.write(new Text(province), new Text(LABEL+pros));
}
//第二个map输出的中间结果形式如下:
//<北京,y_**************>
};
}
/**
* 在下面的reduce中处理两个map的Shuffle阶断之后的中间结果
* 这两个map经过shuffle处理后,结果如下:
* <北京,{y_**********,a_北京}>
* <香港,{a_香港}>
*/
public static class ProvinceReduce extends Reducer<Text, Text, Text, Text>{
protected void reduce(Text key, java.lang.Iterable<Text> values, org.apache.hadoop.mapreduce.Reducer<Text,Text,Text,Text>.Context context) throws java.io.IOException ,InterruptedException {
/*String pro=null;
ArrayList<String> list = new ArrayList<String>();
for (Text value : values) {
String info=value.toString();
if(info.startsWith(MarketProvinceMap.LABEL)){
String[] pros=info.substring(2).split("\t");
if (pros.length==6) {
pro=pros[4].trim();
}
}else if(info.startsWith(AllProvinceMap.LABEL)){
list.add(info.substring(2));
}
}
if(pro==null && list.size()>0){
for (String prov : list) {
context.write(new Text(prov), new Text(""));
}
}*/
boolean hasMarket = false;
for (Text value : values) {
String prov = value.toString();
if (prov.startsWith(MarketProvinceMap.LABEL)) {
hasMarket = true;
}
}
if(!hasMarket){
context.write(key, new Text(""));
}
};
}
}
Mapreduce算法五、mapJoin与标识
最新推荐文章于 2023-11-05 14:45:22 发布