Hadoop MapReduce单表关联程序

package com.hadoop.sample;  
  
import java.io.IOException;  
import java.util.Iterator;  
import java.util.StringTokenizer;  
  
import org.apache.hadoop.conf.Configuration;  
import org.apache.hadoop.fs.Path;  
import org.apache.hadoop.io.IntWritable;  
import org.apache.hadoop.io.Text;  
import org.apache.hadoop.mapreduce.Job;  
import org.apache.hadoop.mapreduce.Mapper;  
import org.apache.hadoop.mapreduce.Reducer;  
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
import org.apache.hadoop.util.GenericOptionsParser;  
  
public class STJoin {  
    private static int time = 0;  
    //map将输入分割成child和parent,然后正序输出一次作为右表  
    //反序输出一次作为左表,需要注意的是在输出的value中必须加上左右表区别标志  
    public static class Map extends Mapper<Object,Text,Text,Text>{  
  
        public void map(Object key,Text value,Context context) throws IOException,InterruptedException{  
            String childname = new String();  
            String parentname = new String();  
            String relationtype = new String();  
            String line = value.toString();  
            int i = 0;  
            while(line.charAt(i) != ' '){  
                i++;  
            }  
            String[] values = {line.substring(0,i),line.substring(i+1)};  
            if(values[0].compareTo("child") != 0){  
                childname = values[0];  
                parentname = values[1];  
                relationtype = "1";//左右表区分标志  
                context.write(new Text(values[1]), new Text(relationtype + "+" +childname+"+"+parentname));//左表  
                relationtype = "2";  
                context.write(new Text(values[0]), new Text(relationtype + "+" +childname+"+"+parentname));//右表  
            }  
        }  
    }  
    public static class Reduce extends Reducer<Text,Text,Text,Text>{  
        public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException{  
            if(time == 0){//输出表头  
                context.write(new Text("grandchild"),new Text("grandparent"));  
                time++;  
            }  
            int grandchildnum = 0;  
            String grandchild[] = new String[10];  
            int grandparentnum = 0;  
            String grandparent[] = new String[10];  
            Iterator ite = values.iterator();  
            while(ite.hasNext()){  
                String record = ite.next().toString();  
                int len = record.length();  
                int i = 2;  
                if(len == 0){  
                    continue;  
                }  
                char relationtype = record.charAt(0);  
                String childname = new String();  
                String parentname = new String();  
                //获取value-list中value的child  
                while(record.charAt(i) != '+'){  
                    childname = childname + record.charAt(i);  
                    i++;  
                }  
                i = i+1;  
                //获取value-list中value的parent  
                while(i<len){  
                    parentname = parentname + record.charAt(i);  
                    i++;  
                }  
                //左表,取出child放入grandchild  
                if(relationtype == '1'){  
                    grandchild[grandchildnum]=childname;  
                    grandchildnum++;  
                }else{//右表,取出parent放入grandparent  
                    grandparent[grandparentnum]=parentname;  
                    grandparentnum++;  
                }  
                  
            }  
            //grandchild和grandparent数组求笛卡尔积  
            if(grandparentnum!=0&&grandchildnum!=0){  
                for(int m=0;m<grandchildnum;m++){  
                    for(int n=0;n<grandparentnum;n++){  
                        context.write(new Text(grandchild[m]), new Text(grandparent[n]));  
                    }  
                }  
            }  
        }  
    }  
    /** 
     * @param args 
     */  
    public static void main(String[] args) throws Exception{  
        // TODO Auto-generated method stub  
        Configuration conf = new Configuration();  
        String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();  
        if(otherArgs.length != 2){  
            System.err.println("Usage WordCount <int> <out>");  
            System.exit(2);  
        }  
        Job job = new Job(conf,"single table join");  
        job.setJarByClass(STJoin.class);  
        job.setMapperClass(Map.class);  
        job.setCombinerClass(Reduce.class);  
        job.setReducerClass(Reduce.class);  
        job.setOutputKeyClass(Text.class);  
        job.setOutputValueClass(Text.class);  
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));  
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));  
        System.exit(job.waitForCompletion(true) ? 0 : 1);  
    }  
  
} 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值