Hadoop MapReduce多表关联程序

    package com.hadoop.sample;  
      
    import java.io.IOException;  
    import java.util.Iterator;  
    import java.util.StringTokenizer;  
      
    import org.apache.hadoop.conf.Configuration;  
    import org.apache.hadoop.fs.Path;  
    import org.apache.hadoop.io.IntWritable;  
    import org.apache.hadoop.io.Text;  
    import org.apache.hadoop.mapreduce.Job;  
    import org.apache.hadoop.mapreduce.Mapper;  
    import org.apache.hadoop.mapreduce.Reducer;  
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
    import org.apache.hadoop.util.GenericOptionsParser;  
      
    public class MTJoin {  
        private static int time = 0;  
        public static class Map extends Mapper<Object,Text,Text,Text>{  
            //在map中先区分输入行属于左表还是右表,然后对两列值进行分割,  
            //保存连接列在key值,剩余列和左右表标志在value中,最后输出  
            public void map(Object key,Text value,Context context) throws IOException,InterruptedException{  
                String line = value.toString();  
                int i = 0;  
                //输入文件首行,不处理  
                if(line.contains("factoryname")==true||line.contains("addressID")==true){  
                    return;  
                }  
                //找出数据中的分割点  
                while(line.charAt(i)>='9'||line.charAt(i)<='0'){  
                    i++;  
                }  
                if(line.charAt(i)>='9'||line.charAt(i)<='0'){  
                    //左表  
                    int j = i-1;  
                    while(line.charAt(j)!=' ') j--;  
                    String[] values = {line.substring(0, j),line.substring(i)};  
                    context.write(new Text(values[1]), new Text("1+"+values[0]));  
                }else{//右表  
                    int j = i+1;  
                    while(line.charAt(j)!=' ') j++;  
                    String[] values = {line.substring(0, i+1),line.substring(j)};  
                    context.write(new Text(values[0]), new Text("2+"+values[1]));  
                }  
            }  
        }  
        public static class Reduce extends Reducer<Text,Text,Text,Text>{  
            //reduce解析map输出,将value中数据按照左右表分别保存,然后求笛卡尔积,输出  
            public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException{  
                if(time == 0){//输入文件第一行  
                    context.write(new Text("factoryname"),new Text("addressname"));  
                    time++;  
                }  
                int factorynum = 0;  
                String factory[] = new String[10];  
                int adressnum = 0;  
                String adress[] = new String[10];  
                Iterator iter = values.iterator();  
                while(iter.hasNext()){  
                    String record = iter.next().toString();  
                    int len = record.length();  
                    int i = 2;  
                    char type = record.charAt(0);  
                    String factoryname = new String();  
                    String adressname = new String();  
                    if(type == '1'){//左表  
                        factory[factorynum] = record.substring(2);  
                        factorynum++;  
                    }else{//右表  
                        adress[adressnum] = record.substring(2);  
                    }  
                }  
                if(factorynum!=0&&adressnum!=0){//笛卡尔积  
                    for(int m=0;m<factorynum;m++){  
                        for(int n=0;n<adressnum;n++){  
                            context.write(new Text(factory[m]), new Text(adress[n]));  
                        }  
                    }  
                }  
            }  
        }  
        /** 
         * @param args 
         */  
        public static void main(String[] args) throws Exception{  
            // TODO Auto-generated method stub  
            Configuration conf = new Configuration();  
            String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();  
            if(otherArgs.length != 2){  
                System.err.println("Usage WordCount <int> <out>");  
                System.exit(2);  
            }  
            Job job = new Job(conf,"word count");  
            job.setJarByClass(MTJoin.class);  
            job.setMapperClass(Map.class);  
            job.setCombinerClass(Reduce.class);  
            job.setReducerClass(Reduce.class);  
            job.setOutputKeyClass(Text.class);  
            job.setOutputValueClass(Text.class);  
            FileInputFormat.addInputPath(job, new Path(otherArgs[0]));  
            FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));  
            System.exit(job.waitForCompletion(true) ? 0 : 1);  
        }  
      
    }  

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值