1.对象数据的处理&2.json数据处理&3.提取网站并计算流量&4.分组TopN&5.MapjoinTest

最新推荐文章于 2020-12-20 16:44:33 发布

好先生！！！

最新推荐文章于 2020-12-20 16:44:33 发布

阅读量243

点赞数

分类专栏： hadoop

本文链接：https://blog.csdn.net/qq_42796847/article/details/100139128

版权

hadoop 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

1.对象数据的处理:

在这里插入图片描述
(1)person类:

package com.mr.day05.test01;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class person implements Writable {

    private String name;

    private int age;

    private int score;

    private String sex;




    public person(String name, int age, int score, String sex) {
        this.name = name;
        this.age = age;
        this.score = score;
        this.sex = sex;
    }


    public person() {

    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public int getAge() {
        return age;
    }

    public void setAge(int age) {
        this.age = age;
    }

    public int getScore() {
        return score;
    }

    public void setScore(int score) {
        this.score = score;
    }

    public String getSex() {
        return sex;
    }

    public void setSex(String sex) {
        this.sex = sex;
    }

    @Override
    public void write(DataOutput out) throws IOException {
            out.writeUTF(this.name);
            out.writeInt(this.age);
            out.writeInt(this.score);
            out.writeUTF(this.sex);


    }

    @Override
    public void readFields(DataInput in) throws IOException {

        this.name = in.readUTF();
        this.age = in.readInt();
        this.score = in.readInt();
        this.sex = in.readUTF();

    }

    @Override
    public String toString() {
        return "person{" +
                "name='" + name + '\'' +
                ", age=" + age +
                ", score=" + score +
                ", sex='" + sex + '\'' +
                '}';
    }
}

(2)

package com.mr.day05.test01;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class onlymap2object01 {
    public  static   class  map01 extends Mapper<LongWritable, Text, person, NullWritable> {
        person persons;

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
           persons=new person();
        }

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {





           if (key.get()!=0){
               //String[] vsplit = value.toString().split(" ");
               String vsplits=new String(value.getBytes(),0,value.getLength(),"GBK");
               String[] vsplit= vsplits.split(",");

               if (vsplit.length>=4){
                   persons.setName(vsplit[0]);
                   persons.setAge(Integer.valueOf(vsplit[1]));
                   persons.setScore(Integer.valueOf(vsplit[2]));



                   persons.setSex(vsplit[3]);


                   context.write(persons,NullWritable.get());

               }




           }
        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration conf = new Configuration();
        //conf.set("fs.defaultFS","hdfs://master:9000");

        Job job = Job.getInstance(conf);

        job.setJarByClass(onlymap2object01.class);

        job.setMapperClass(map01.class);


        job.setNumReduceTasks(0);

        //指定map和reduce输出数据的类型
        job.setMapOutputKeyClass(person.class);
        job.setMapOutputValueClass(NullWritable.class);






        FileInputFormat.setInputPaths(job,new Path("D:\\hadoop\\2019.08.28-05mapReduce的各种案例\\Test\\01-对象数据处理\\input01"));

        FileSystem fs = FileSystem.get(conf);
        Path outPath = new Path("D:\\hadoop\\2019.08.28-05mapReduce的各种案例\\Test\\01-对象数据处理\\output01");


        if (fs.exists(outPath)){
            fs.delete(outPath,true);
        }

        FileOutputFormat.setOutputPath(job,outPath);

        job.submit();
    }

}

结果图:

在这里插入图片描述

2.json数据处理

在这里插入图片描述

package com.mr.day05.test05;

public class Phone {
        private long uid;
        private long phone;

        private String addr;

    public long getUid() {
        return uid;
    }

    public void setUid(long uid) {
        this.uid = uid;
    }

    public long getPhone() {
        return phone;
    }

    public void setPhone(long phone) {
        this.phone = phone;
    }

    public String getAddr() {
        return addr;
    }

    public void setAddr(String addr) {
        this.addr = addr;
    }


    public Phone(long uid, long phone) {
        this.uid = uid;
        this.phone = phone;
    }

    public Phone() {

    }


    @Override
    public String toString() {
        return "Phone{" +
                "uid=" + uid +
                ", phone=" + phone +
                ", addr='" + addr + '\'' +
                '}';
    }
}

package com.mr.day05.test05;



import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.codehaus.jackson.map.ObjectMapper;

import java.io.IOException;

public class ReadJson2Object {
    public static class ReadJson2ObjectMapper extends Mapper<LongWritable, Text,Phone, NullWritable> {

        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line=new String(value.getBytes(),0,value.getLength(),"GBK");
            ObjectMapper objectMapper = new ObjectMapper();
            Phone phone = objectMapper.readValue(line, Phone.class);
            context.write(phone,NullWritable.get());
        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(ReadJson2Object.class);

        job.setMapperClass(ReadJson2ObjectMapper.class);

        job.setOutputKeyClass(Phone.class);
        job.setOutputValueClass(NullWritable.class);

        job.setNumReduceTasks(0);

        FileInputFormat.setInputPaths(job,new Path("D:\\hadoop\\2019.08.28-05mapReduce的各种案例\\Test\\05-json数据处理\\input01"));

        FileSystem fs = FileSystem.get(conf);
        Path outPath = new Path("D:\\hadoop\\2019.08.28-05mapReduce的各种案例\\Test\\05-json数据处理\\output01");
        if (fs.exists(outPath)){
            fs.delete(outPath,true);
        }

        FileOutputFormat.setOutputPath(job,outPath);

        boolean b = job.waitForCompletion(true);
        System.exit(b?0:1);

    }

}

结果图:

在这里插入图片描述

3.提取网站并计算流量

在这里插入图片描述

package com.mr.day05.test02;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class mrtest02 {

    public  static   class  map01 extends Mapper<LongWritable, Text,  Text,Text> {




        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        if (key.get()!=0){
            String[] split1 = value.toString().split("//");
            String[] split2 = split1[1].split("/");
            String[] name = split2[0].split("[.]");
            String[] fls = value.toString().split(" ");
            String fl = fls[2];
            String f2 = fls[3];


            context.write(new Text(name[name.length-2]),new Text(fl+" "+f2));



        }



        }

    }


    public  static  class  reduce01 extends Reducer<Text,Text,Text,Text> {



        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {



            int ssum=0;
            int xsum=0;
             int zsum=0;
            for (Text unm: values){
                String[] s = unm.toString().split(" ");

                ssum+=Integer.valueOf(s[0]);
                xsum+=Integer.valueOf(s[1]);

            }
            zsum=ssum+xsum;
            context.write(key,new Text(ssum+","+xsum+","+zsum) );

        }






    }


    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration conf = new Configuration();
        //conf.set("fs.defaultFS","hdfs://master:9000");

        Job job = Job.getInstance(conf);

        job.setJarByClass(mrtest02.class);

        job.setMapperClass(map01.class);
        job.setReducerClass(reduce01.class);


        //指定map和reduce输出数据的类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);


        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);



        FileInputFormat.setInputPaths(job,new Path("D:\\hadoop\\2019.08.28-05mapReduce的各种案例\\Test\\02-提取网站并计算流量\\input01"));

        FileSystem fs = FileSystem.get(conf);
        Path outPath = new Path("D:\\hadoop\\2019.08.28-05mapReduce的各种案例\\Test\\02-提取网站并计算流量\\output01");


        if (fs.exists(outPath)){
            fs.delete(outPath,true);
        }

        FileOutputFormat.setOutputPath(job,outPath);

        job.submit();
    }



}

结果图:

在这里插入图片描述

4.分组TopN:

在这里插入图片描述
(1.Student类继承WritableComparable自定义排序)

package com.mr.day05.test03;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class Student implements WritableComparable<Student> {
    private String name;
    private String subject;
    private double avescore;


    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getSubject() {
        return subject;
    }

    public void setSubject(String subject) {
        this.subject = subject;
    }

    public double getAvescore() {
        return avescore;
    }

    public void setAvescore(double avescore) {
        this.avescore = avescore;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(this.name);
        dataOutput.writeUTF(this.subject);
        dataOutput.writeDouble(this.avescore);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.name=dataInput.readUTF();
        this.subject=dataInput.readUTF();
        this.avescore=dataInput.readDouble();

    }



    @Override
    public String toString() {
        return "Student{" +
                "name='" + name + '\'' +
                ", subject='" + subject + '\'' +
                ", avescore=" + avescore +
                '}';
    }

    @Override
    public int compareTo(Student s) {
      if (s.getAvescore()==this.getAvescore()){
          return s.getName().compareTo(this.getName());

      }else {
          return s.getAvescore() >this.getAvescore() ? 1:-1;
      }
    }

}

(2.重写Partitioner定义分区)

package com.mr.day05.test03;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;

import java.util.*;

public class SubjectPadrtitioner extends Partitioner <Student, NullWritable>{
        // Map<String,Integer>  map=new HashMap<String,Integer>();

        List<String> list=new ArrayList<>();

    @Override
    public int getPartition(Student student, NullWritable nullWritable, int numPartitions) {
       /* map.put("computer",0);
        map.put("english",1);
        map.put("algorithm",2);
        map.put("math",3);
        return map.get(student.getSubject());*/

        System.out.println(student);
       if (list.contains(student.getSubject())){

           return list.indexOf(student.getSubject());

       }else {
           list.add(student.getSubject());
           return list.indexOf(student.getSubject());
       }

    }
}

(3)

package com.mr.day05.test03;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;


import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class mrtest3 {
    public  static   class  map03 extends Mapper<LongWritable, Text,Student, NullWritable> {




        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {


                String[] vsplit = value.toString().split(",");
            if (vsplit.length>=3){
                Student student = new Student();
                Double sum=0.0;
                int num = vsplit.length - 2;
                for (int i=2;i<vsplit.length;i++){

                    sum+=Double.valueOf(vsplit[i]);

                }
               Double avescore=sum/num;


                student.setSubject(vsplit[0]);
                student.setName(vsplit[1]);
                student.setAvescore(avescore);

                //System.out.println(student);
                context.write(student,NullWritable.get());


            }


        }
    }


    public static class Reduce03 extends Reducer<Student,NullWritable,Student, NullWritable> {

        protected void reduce(Student key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            //System.out.println(key);

           context.write(key,NullWritable.get());
        }



    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration conf = new Configuration();
        //conf.set("fs.defaultFS","hdfs://master:9000");

        Job job = Job.getInstance(conf);

        job.setJarByClass(mrtest3.class);

        job.setMapperClass(map03.class);
        job.setReducerClass(Reduce03.class);

        job.setNumReduceTasks(4);
        job.setPartitionerClass(SubjectPadrtitioner.class);
        //指定map和reduce输出数据的类型
        job.setMapOutputKeyClass(Student.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setOutputKeyClass(Student.class);
        job.setOutputValueClass(NullWritable.class);



        FileInputFormat.setInputPaths(job,new Path("D:\\hadoop\\2019.08.28-05mapReduce的各种案例\\Test\\03-分组TopN\\input01"));

        FileSystem fs = FileSystem.get(conf);
        Path outPath = new Path("D:\\hadoop\\2019.08.28-05mapReduce的各种案例\\Test\\03-分组TopN\\output01");


        if (fs.exists(outPath)){
            fs.delete(outPath,true);
        }

        FileOutputFormat.setOutputPath(job,outPath);

        job.submit();
    }

}

结果图:

1
在这里插入图片描述
2

3

4

5.MapjoinTest

小表:
在这里插入图片描述
大表(小连大)

package com.mr.day05.test04;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

public class MapJoinTest04 {

    public static class MapJoinTestMapper extends Mapper<LongWritable, Text,Text, NullWritable> {
        Map<String,String> goodsMap = new HashMap<String,String>();

        protected void setup(Context context) throws IOException, InterruptedException {

            BufferedReader br = new BufferedReader(new FileReader("D:\\hadoop\\2019.08.28-05mapReduce的各种案例\\Test\\04-MapjoinTest\\input01\\小表\\b.txt"));
            String line = "";
            while (StringUtils.isNotBlank(line=br.readLine())){
                String[] split = line.split(",");
                if (split.length>=2){
                    goodsMap.put(split[0],split[1]);
                }
            }
        }

        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String vsplits=new String(value.getBytes(),0,value.getLength(),"GBK");
            String[] split = vsplits.split(",");

            if (split.length>=2){
                String goodInfo = goodsMap.get(split[0]);
                context.write(new Text(split[0]+ ","+split[1]+","+goodInfo),NullWritable.get());
            }
        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(MapJoinTest04.class);

        job.setMapperClass(MapJoinTestMapper.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);
        job.setNumReduceTasks(0);

//        job.addCacheFile(new URI("hdfs://B01master:9000/join/cache/user.txt"));

        FileInputFormat.setInputPaths(job,new Path("D:\\hadoop\\2019.08.28-05mapReduce的各种案例\\Test\\04-MapjoinTest\\input01\\inout01"));

        FileSystem fs = FileSystem.get(conf);
        Path outPath = new Path("D:\\hadoop\\2019.08.28-05mapReduce的各种案例\\Test\\04-MapjoinTest\\input01\\outout01");
        if (fs.exists(outPath)){
            fs.delete(outPath,true);
        }

        FileOutputFormat.setOutputPath(job,outPath);
        boolean b = job.waitForCompletion(true);
        System.exit(b?0:1);
    }
}

结果图:

在这里插入图片描述

好先生！！！

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
1.对象数据的处理&2.json数据处理&3.提取网站并计算流量&4.分组TopN&5.MapjoinTest

1.对象数据的处理:(1)person类:package com.mr.day05.test01;import org.apache.hadoop.io.Writable;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;public class person imple...
复制链接

扫一扫