多表关联

最新推荐文章于 2024-08-22 16:51:19 发布

whisky丶

最新推荐文章于 2024-08-22 16:51:19 发布

阅读量2.1k

点赞数

本文链接：https://blog.csdn.net/qq_35433068/article/details/77281726

版权

多表关联和单表关联类似，它也是通过对原始数据进行一定的处理，从其中挖掘出关心的信息

1 实例描述
输入是两个文件，一个代表工厂表，包含工厂名列和地址编号列；另一个代表地址表，包含地址名列和地址编号列。要求从输入数据中找出工厂名和地址名的对应关系，输出”工厂名——地址名”表
地址表
addressID addressname
1 　　　　Beijing
2 　　　　Guangzhou
3 　　　　Shenzhen
4 　　　　Xian
工厂表
factoryname 　　　　addressed
Beijing Red Star 　　　 1
Shenzhen Thunder 　　　 3
Guangzhou Honda 　　　 2
Beijing Rising 　　　　1
Guangzhou Development Bank 2
Tencent 　　　　　　　3
Back of Beijing 　　　　 1
期望输出：
factoryname addressname
Back of Beijing 　　　　 Beijing
Beijing Red Star 　　　　Beijing
Beijing Rising 　　　　　 Beijing
Guangzhou Development Bank Guangzhou
Guangzhou Honda 　　　　Guangzhou
Shenzhen Thunder 　　　　Shenzhen
Tencent 　　　　　　　　Shenzhen

2 问题分析
　多表关联和单表关联相似，都类似于数据库中的自然连接。相比单表关联，多表关联的左右表和连接列更加清楚。所以可以采用和单表关联的相同的处理方式，map识别出输入的行属于哪个表之后，对其进行分割，将连接的列值保存在key中，另一列和左右表标识保存在value中，然后输出。reduce拿到连接结果之后，解析value内容，根据标志将左右表内容分开存放，然后求笛卡尔积，最后直接输出。

3 代码

package mr;

import java.net.URI;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;   

public class MyFactory {



    static class MyMapper  extends  Mapper<LongWritable, Text, Text, Text>{  

         public void map(LongWritable k1, Text v1, Context context) 
                         throws java.io.IOException, java.lang.InterruptedException
         {
            int i = 0;
            String[]  lines= v1.toString().split(",");
            if((lines.length != 2 && lines[0].equals("factoryname") )|| 
                    (lines.length != 2 && lines[0].equals("addressID")))return;
            if(lines[0].charAt(0) >= '0' && lines[0].charAt(0) <= '9')i++;
            if(i < 1){
                String f_name = lines[0];
                String f_address = lines[1];
                context.write(new Text(f_address), new Text("0"+","+f_name+","+f_address));
                System.out.println(f_address+f_name);
            }
            else{
                String a_id = lines[0];
                String a_name = lines[1];
                context.write(new Text(a_id), new Text("1"+","+a_name+","+a_id));
            }
            System.out.println("map......");
         }

    }

    static class  MyReduce extends Reducer<Text, Text, Text, Text>{
         public void reduce(Text key, Iterable<Text> values, Context context) throws java.io.IOException, java.lang.InterruptedException
         {
             List<String> f_name = new ArrayList();
             List<String> a_name = new ArrayList();
             Iterator<Text>  it = values.iterator();
             while(it.hasNext()){
                String line = it.next().toString();
                System.out.println(key+" "+line);
                String[] words = line.split(",");
                if(words[0].equals("0")){
                    f_name.add(words[1]);
                }
                else if(words[0].equals("1")){
                    a_name.add(words[1]);
                }
                else return;

             }
             System.out.println("reduce......");
             for (String a : a_name) {
                    for (String f : f_name) {
                        context.write(new Text(f), new Text(a));
                    }
            }

         }
    }

    private static String INPUT_PATH="hdfs://master:9000/input/factory.dat";
    private static String INPUT_PATH1="hdfs://master:9000/input/address.dat";
    private static String OUTPUT_PATH="hdfs://master:9000/output/c/";

    public static void main(String[] args) throws Exception {   

        Configuration  conf=new Configuration();
        FileSystem  fs=FileSystem.get(new URI(OUTPUT_PATH),conf);

        if(fs.exists(new Path(OUTPUT_PATH)))
                fs.delete(new Path(OUTPUT_PATH));

        Job  job=new Job(conf,"myjob");

        job.setJarByClass(MyGL.class);
        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReduce.class);


        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);



        FileInputFormat.addInputPath(job,new Path(INPUT_PATH));
        FileInputFormat.addInputPath(job,new Path(INPUT_PATH1));
        FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));

        job.waitForCompletion(true);

    }

}