1WordCount&2合并相同&3共同好友

最新推荐文章于 2020-11-02 11:27:48 发布

好先生！！！

最新推荐文章于 2020-11-02 11:27:48 发布

阅读量128

点赞数

分类专栏： hadoop-Mr

本文链接：https://blog.csdn.net/qq_42796847/article/details/100138335

版权

hadoop-Mr 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

1.WordCount

①将文件拆分成splits，由于测试用的文件较小，所以每个文件为一个split，并将文件按行分割形成<key,value>对儿。这一步由MapReduce框架自动完成，其中偏移量（即key值）包括了回车所占的字符数（Windows和Linux环境会不同）。
在这里插入图片描述
②将分割好的<key,value>对交给用户定义的map方法进行处理，生成新的<key,value>对儿。

在这里插入图片描述
③ 得到map方法输出的<key,value>对后，Mapper会将它们按照key值进行排序，并执行Combine过程，将key相同的value值累加，得到Mapper的最终输出结果。

④ Reducer先对从Mapper接收的数据进行排序，再交由用户自定义的reduce方法进行处理，得到新的<key,value>对，并作为WordCount的输出结果。
在这里插入图片描述

package com.mr.day05.test03;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;


import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class mrtest3 {
    public  static   class  map03 extends Mapper<LongWritable, Text,Student, NullWritable> {




        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {


                String[] vsplit = value.toString().split(",");
            if (vsplit.length>=3){
                Student student = new Student();
                Double sum=0.0;
                int num = vsplit.length - 2;
                for (int i=2;i<vsplit.length;i++){

                    sum+=Double.valueOf(vsplit[i]);

                }
               Double avescore=sum/num;


                student.setSubject(vsplit[0]);
                student.setName(vsplit[1]);
                student.setAvescore(avescore);

                //System.out.println(student);
                context.write(student,NullWritable.get());


            }


        }
    }


    public static class Reduce03 extends Reducer<Student,NullWritable,Student, NullWritable> {

        protected void reduce(Student key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            //System.out.println(key);

           context.write(key,NullWritable.get());
        }



    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration conf = new Configuration();
        //conf.set("fs.defaultFS","hdfs://master:9000");

        Job job = Job.getInstance(conf);

        job.setJarByClass(mrtest3.class);

        job.setMapperClass(map03.class);
        job.setReducerClass(Reduce03.class);

        job.setNumReduceTasks(4);
        job.setPartitionerClass(SubjectPadrtitioner.class);
        //指定map和reduce输出数据的类型
        job.setMapOutputKeyClass(Student.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setOutputKeyClass(Student.class);
        job.setOutputValueClass(NullWritable.class);



        FileInputFormat.setInputPaths(job,new Path("D:\\上课\\hadoop\\2019.08.28-05mapReduce的各种案例\\Test\\03-分组TopN\\input01"));

        FileSystem fs = FileSystem.get(conf);
        Path outPath = new Path("D:\\上课\\hadoop\\2019.08.28-05mapReduce的各种案例\\Test\\03-分组TopN\\output01");


        if (fs.exists(outPath)){
            fs.delete(outPath,true);
        }

        FileOutputFormat.setOutputPath(job,outPath);

        job.submit();
    }

}

结果图:

在这里插入图片描述

2合并相同

在这里插入图片描述

package com.mr.day03.test02;


        import org.apache.hadoop.conf.Configuration;
        import org.apache.hadoop.fs.FileSystem;
        import org.apache.hadoop.fs.Path;
        import org.apache.hadoop.io.IntWritable;
        import org.apache.hadoop.io.LongWritable;
        import org.apache.hadoop.io.Text;
        import org.apache.hadoop.mapreduce.Job;
        import org.apache.hadoop.mapreduce.Mapper;
        import org.apache.hadoop.mapreduce.Reducer;
        import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
        import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

// long    LongWritable
// String  Text
// int     IntWritable
        import java.io.IOException;

public class sumTest {
    // KEYIN   每一行数据的起始位置，行偏移量
    // VALUEIN 每一行数据
    // KEYOUT  map端输出数据中key的类型
    // VALUEOUT map端输出数据中value的类型
    public static class WcMapper extends Mapper<LongWritable, Text,Text, IntWritable>{


        // key    行偏移量
        // value  每行的数据
        // context  上下文，就是将我们map处理完以后的数据，发送出去。
        protected void map(LongWritable key ,Text value ,Context context) throws IOException, InterruptedException {

            if (key.get()!=0){
                String name = value.toString().split(",")[0];
                String val = value.toString().split(",")[1];
                Integer integer = Integer.valueOf(val);


                context.write(new Text(name),new IntWritable(integer));



            }


        }


    }


    // KEYIN  reduce端接受数据中key的类型
    // VALUEIN  reduce端接受数据中value的类型
    // KEYOUT   reduce端输出数据中key的类型
    // VALUEOUT  reduce端输出数据中value的类型
    public static class WcReduce extends Reducer<Text,IntWritable,Text,IntWritable>{

        // key  就是reduce端接受数据中map的类型
        // values  相同key的value集合
        // context  上下文，就是将我们reduce处理完以后的数据，发送出去。
        protected void  reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int sum=0;
            for(IntWritable value:values){

                sum += value.get();

            }

            context.write(key,new IntWritable(sum));


        }


    }


    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration conf = new Configuration();
        //conf.set("fs.defaultFS","hdfs://master:9000");

        Job job = Job.getInstance(conf);

        job.setJarByClass(sumTest.class);

        job.setMapperClass(WcMapper.class);
        job.setReducerClass(WcReduce.class);

        //指定map和reduce输出数据的类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);



        FileInputFormat.setInputPaths(job,new Path("D:\\上课\\hadoop\\2019.08.26-03mapReduce简单代码\\test\\02-合并相同\\input01"));

        FileSystem fs = FileSystem.get(conf);
        Path outPath = new Path("D:\\上课\\hadoop\\2019.08.26-03mapReduce简单代码\\test\\02-合并相同\\output01");


        if (fs.exists(outPath)){
            fs.delete(outPath,true);
        }

        FileOutputFormat.setOutputPath(job,outPath);

        job.submit();
    }

}

结果图:

在这里插入图片描述

3.共同好友

在这里插入图片描述

(1)

package com.mr.day03.test03;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class Test03 {
    public static class Mapper01 extends Mapper<LongWritable, Text,Text,Text>{

        protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {

            String[] split = value.toString().split(":");

            String skey=split[0];

            String[] svalue = split[1].split(",");

            for (String val:svalue){

                context.write(new Text(val),new Text(skey));



            }



        }
    }

    public static class Reduce01 extends Reducer<Text,Text,Text,Text> {

        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

            StringBuilder sd =new StringBuilder();
            for (Text val:values){

                sd.append(val).append(",");


            }

            context.write(key,new Text(sd.toString()));

        }
    }




    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration conf = new Configuration();
        //conf.set("fs.defaultFS","hdfs://master:9000");

        Job job = Job.getInstance(conf);

        job.setJarByClass(Test03.class);

        job.setMapperClass(Mapper01.class);
        job.setReducerClass(Reduce01.class);

        //指定map和reduce输出数据的类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);



        FileInputFormat.setInputPaths(job,new Path("D:\\上课\\hadoop\\2019.08.26-03mapReduce简单代码\\test\\03-共同好友\\inupt03"));

        FileSystem fs = FileSystem.get(conf);
        Path outPath = new Path("D:\\上课\\hadoop\\2019.08.26-03mapReduce简单代码\\test\\03-共同好友\\outupt03");


        if (fs.exists(outPath)){
            fs.delete(outPath,true);
        }

        FileOutputFormat.setOutputPath(job,outPath);

        job.submit();
    }

}

第一次MR后:

在这里插入图片描述
(2)

package com.mr.day03.test03;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class Test04 {


    public static class Mapper02 extends Mapper<LongWritable, Text,Text,Text> {

        protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {

            String[] split = value.toString().split("\t");

         String   keys=split[0];
         String   vals=split[1];

            String[] sp = vals.split(",");
            for (int i=0;i<sp.length;i++){
                String s1 = sp[i];


                for (int k=i+1;k<sp.length;k++){
                    StringBuilder sb=new StringBuilder();
                    String s2 = sp[k];
                    if(s1.charAt(0)<s2.charAt(0))
                    sb.append(s1).append("-").append(s2);
                    if(s1.charAt(0)>s2.charAt(0))
                        sb.append(s2).append("-").append(s1);
                    context.write(new Text(sb.toString()),new Text(keys));
                }


            }

        }
    }

    public static class Reduce02 extends Reducer<Text,Text,Text,Text> {

        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

            StringBuilder sd =new StringBuilder();
            for (Text val:values){

                sd.append(val).append(",");


            }

            context.write(key,new Text(sd.toString()));

        }
    }



    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration conf = new Configuration();
        //conf.set("fs.defaultFS","hdfs://master:9000");

        Job job = Job.getInstance(conf);

        job.setJarByClass(Test04.class);

        job.setMapperClass(Mapper02.class);
        job.setReducerClass(Reduce02.class);

        //指定map和reduce输出数据的类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);



        FileInputFormat.setInputPaths(job,new Path("D:\\上课\\hadoop\\2019.08.26-03mapReduce简单代码\\test\\03-共同好友\\outupt03"));

        FileSystem fs = FileSystem.get(conf);
        Path outPath = new Path("D:\\上课\\hadoop\\2019.08.26-03mapReduce简单代码\\test\\03-共同好友\\outupt04");


        if (fs.exists(outPath)){
            fs.delete(outPath,true);
        }

        FileOutputFormat.setOutputPath(job,outPath);

        job.submit();
    }

}

最后结果图:

在这里插入图片描述

好先生！！！

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
1WordCount&2合并相同&3共同好友

1.WordCount①将文件拆分成splits，由于测试用的文件较小，所以每个文件为一个split，并将文件按行分割形成<key,value>对儿。这一步由MapReduce框架自动完成，其中偏移量（即key值）包括了回车所占的字符数（Windows和Linux环境会不同）。②将分割好的<key,value>对交给用户定义的map方法进行处理，生成新的<key...
复制链接

扫一扫

专栏目录