hadoop之MapReduce的案例（单表关联）

最新推荐文章于 2023-09-24 18:31:52 发布

月疯

最新推荐文章于 2023-09-24 18:31:52 发布

阅读量425

点赞数 1

分类专栏：【hadoop】文章标签： hadoop

本文链接：https://blog.csdn.net/chehec2010/article/details/123617249

版权

【hadoop】专栏收录该内容

25 篇文章 4 订阅

订阅专栏

package squencefile;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

//单表关联
public class SingleJoin {

    public static class MyMapper extends Mapper<LongWritable, Text, Text, Text> {
        @Override
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            //拆分文件
            String line =value.toString();
            String[] lineArr = line.split(" ");
            //过滤文件头
            if(!"child".equals(lineArr[0])){
                //输出对应<子，父>，1表示向上父母辈，2向下，孩子辈
                context.write(new Text(lineArr[0]),new Text(":1"+lineArr[1]));
                //输出对应的<父，子>
                context.write(new Text(lineArr[1]),new Text(":2"+lineArr[0]));
            }
        }
    }

    public static class MyReducer extends Reducer<Text, Text, Text, Text> {
        List<String> grandChildList=new ArrayList<>();
        List<String> grandParaentList=new ArrayList<>();
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

            for(Text tempVal:values){
                String tempValStr = tempVal.toString();
                String[] tempArr=tempValStr.split(":");
                if("2".equals(tempArr[0])){
                    //找出孙子辈列表
                    grandChildList.add(tempArr[1]);
                }else if("1".equals(tempArr[0])){
                    //找出祖父辈列表
                    grandParaentList.add(tempArr[1]);

                }
            }
            //将俩个列表进行关联，获取<孙子辈，祖父辈>关系
            for(String grandParent:grandParaentList){
                for(String grandChild:grandChildList){
                    //输出<孙子辈，祖父辈>
                    context.write(new Text(grandChild),new Text(grandParent));
                }
            }
        }
    }
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //创建一个job，也就是一个运行环境
        Configuration conf=new Configuration();
        //集群运行
//        conf.set("fs.defaultFS","hdfs://hadoop:8088");
        //本地运行
        Job job=Job.getInstance(conf,"SingleJoin");
        //程序入口（打jar包）
        job.setJarByClass(SingleJoin.class);

        //需要输入俩个文件：输入文件
        FileInputFormat.addInputPath(job,new Path("F:\\filnk_package\\hadoop-2.10.1\\data\\test4\\file1.txt"));
        //编写mapper处理逻辑
        job.setMapperClass(SingleJoin.MyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        //shuffle流程

        //编写reduce处理逻辑
        job.setReducerClass(SingleJoin.MyReducer.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        //输出文件
        FileOutputFormat.setOutputPath(job,new Path("F:\\filnk_package\\hadoop-2.10.1\\data\\test4\\out"));

        //运行job，需要放到Yarn上运行
        boolean result =job.waitForCompletion(true);
        System.out.print(result?1:0);

    }
}

file1数据：

child parent
Tom Lucy
Tom Jack
Jone Lucy
Jone Jack
Lucy Marry
Lucy Ben
Jack Jesse
Jack Alice
Terry Alice
Terry Jesse
phillip Terry
philip Alma
Mark Terry
Mark Alma

月疯

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
hadoop之MapReduce的案例（单表关联）

package squencefile;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.ma...
复制链接

扫一扫