hadoop自定义数据类型

最新推荐文章于 2020-10-10 17:21:47 发布

wending-Y

最新推荐文章于 2020-10-10 17:21:47 发布

阅读量326

点赞数 1

分类专栏： hadoop 文章标签： hadoop 自定义数据类型 mapreduce 分布式

本文链接：https://blog.csdn.net/qq_22222499/article/details/62251024

版权

hadoop 专栏收录该内容

18 篇文章 0 订阅

订阅专栏

自定义数据类型实现实现接口WritableComparable
输入数据格式，第一列为班级号，第二列为姓名，第三列为学号
1 小明 10 3 大明 11
1 小王 12 4 大王 13
1 小张 14 6 大张 15

3.输出数据格式同一个班级在一起
1 学号:14姓名小张学号:12姓名小王学号:10姓名小明
3 学号:11姓名大明
4 学号:13姓名大王
6 学号:15姓名大张
4.定义user类


import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

/**
 * @author DELL_pc
 *  @date 2017年3月15日
 */
public class User  implements WritableComparable<User>{
    int id;
    String name;
    int classid;

    /**
     * 
     */
    public User() {
        // TODO Auto-generated constructor stub
    }

    /**
     * @param id
     * @param name
     * @param classid
     */
    public User(int classid,String name,int id ) {
        super();
        this.id = id;
        this.name = name;
        this.classid = classid;
    }

    public int getId() {
        return id;
    }
    public void setId(int id) {
        this.id = id;
    }
    public String getName() {
        return name;
    }
    public void setName(String name) {
        this.name = name;
    }

    public int getClassid() {
        return classid;
    }

    public void setClassid(int classid) {
        this.classid = classid;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        // TODO Auto-generated method stub
         out.writeInt(id);
         out.writeUTF(name);
         out.writeInt(classid);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        // TODO Auto-generated method stub
        id=in.readInt();
        name=in.readUTF();
        classid=in.readInt();
    }

    @Override
    public int compareTo(User o) {
        // TODO Auto-generated method stub
        return 1;
    }

    @Override
    public String toString() {
        return "User [id=" + id + ", name=" + name + ", classid=" + classid + "]";
    }

    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime * result + classid;
        result = prime * result + id;
        result = prime * result + ((name == null) ? 0 : name.hashCode());
        return result;
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        User other = (User) obj;
        if (classid != other.classid)
            return false;
        if (id != other.id)
            return false;
        if (name == null) {
            if (other.name != null)
                return false;
        } else if (!name.equals(other.name))
            return false;
        return true;
    }


}

5.定义map


import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * @author DELL_pc
 *  @date 2017年3月15日
 */
public class MyMapper  extends Mapper<LongWritable, Text, IntWritable, User>{
    IntWritable cla=new IntWritable();
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, User>.Context context)
            throws IOException, InterruptedException {
        // TODO Auto-generated method stub
        StringTokenizer str=new StringTokenizer(value.toString(),"\t");
        while (str.hasMoreTokens()) {
            String con=str.nextToken();
            StringTokenizer sTokenizer=new StringTokenizer(con);
            while (sTokenizer.hasMoreTokens()) {
            int classid=Integer.valueOf(sTokenizer.nextToken());
            String name=sTokenizer.nextToken();
            int id=Integer.valueOf(sTokenizer.nextToken());
            User user=new User(classid, name, id);
            cla.set(classid);
            context.write(cla, user);

            }

        }
    }
}

6.定义reducer


import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

/**
 * @author DELL_pc
 *  @date 2017年3月15日
 */
public class MyRedecer  extends Reducer<IntWritable, User, IntWritable, Text>{
    Text text=new Text();
    @Override
    protected void reduce(IntWritable key, Iterable<User> user,
            Reducer<IntWritable, User, IntWritable, Text>.Context context) throws IOException, InterruptedException {
        // TODO Auto-generated method stub
        StringBuffer stringBuffer=new StringBuffer();
        int classid=0;
        IntWritable intWrit=new IntWritable();
        for (User user2 : user) {
            classid=user2.getClassid();
            stringBuffer.append("学号:"+user2.getId()+"姓名"+user2.getName()+"\t");
        }
        intWrit.set(classid);
        text.set(stringBuffer.toString());
        context.write(intWrit,text);
    }
}

7.定义主函数


/**
 * @author DELL_pc
 *  @date 2017年3月15日
 */

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class UserDemo implements Tool {
    private Configuration conf=null;
    @Override
    public Configuration getConf() {
        // TODO Auto-generated method stub
        return this.conf;
    }

    @Override
    public void setConf(Configuration that) {
        // TODO Auto-generated method stub
        this.conf=that;
        this.conf.set("fs.defaultFS", "hdfs://192.168.59.130:8020");

    }

    @Override
    public int run(String[] arg0) throws Exception {
        // TODO Auto-generated method stub
         Configuration conf=this.conf;
         Job job=Job.getInstance(conf,"usercount");

         //输入
         FileInputFormat.setInputDirRecursive(job, true);
         FileInputFormat.addInputPath(job, new Path("/user/lei/wordcount/"));
         //map
         job.setMapperClass(MyMapper.class);
         job.setMapOutputKeyClass(IntWritable.class);
         job.setMapOutputValueClass(User.class);


         //3
         //4
         job.setReducerClass(MyRedecer.class);
         job.setOutputKeyClass(IntWritable.class);
         job.setOutputValueClass(Text.class);
         FileOutputFormat.setOutputPath(job, new Path("/user/lei/wordcount/out"));

         return job.waitForCompletion(true) ? 0:1;
    }
   public static void main(String[] args) throws Exception {
       //运行程序
       long startdate=System.currentTimeMillis();
    ToolRunner.run(new UserDemo(), args);
    long enddate=System.currentTimeMillis();
    long costdate=enddate-startdate;
    System.out.println("花费时间"+costdate+"ms");
}
}

8.计数器结果

INFO - Counters: 38
    File System Counters
        FILE: Number of bytes read=656
        FILE: Number of bytes written=470578
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
        HDFS: Number of bytes read=162
        HDFS: Number of bytes written=144
        HDFS: Number of read operations=15
        HDFS: Number of large read operations=0
        HDFS: Number of write operations=4
    Map-Reduce Framework
        Map input records=3
        Map output records=6
        Map output bytes=120
        Map output materialized bytes=138
        Input split bytes=116
        Combine input records=0
        Combine output records=0
        Reduce input groups=4
        Reduce shuffle bytes=138
        Reduce input records=6
        Reduce output records=4
        Spilled Records=12
        Shuffled Maps =1
        Failed Shuffles=0
        Merged Map outputs=1
        GC time elapsed (ms)=45
        CPU time spent (ms)=0
        Physical memory (bytes) snapshot=0
        Virtual memory (bytes) snapshot=0
        Total committed heap usage (bytes)=473956352
    Shuffle Errors
        BAD_ID=0
        CONNECTION=0
        IO_ERROR=0
        WRONG_LENGTH=0
        WRONG_MAP=0
        WRONG_REDUCE=0
    File Input Format Counters 
        Bytes Read=81
    File Output Format Counters 
        Bytes Written=144

wending-Y

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
打赏
0
评论
hadoop自定义数据类型

自定义数据类型实现实现接口WritableComparable输入数据格式，第一列为班级号，第二列为姓名，第三列为学号 1 小明 10 3 大明 11 1 小王 12 4 大王 13 1 小张 14 6 大张 153.输出数据格式同一个班级在一起 1 学号:14姓名小张学号:12姓名小王学号:10姓名小明 3 学号:11姓名大明 4
复制链接

扫一扫