CC00048.hadoop——|Hadoop&MapReduce.V21|——|Hadoop.v21|MapReduce综合案例.v02|_fields[1], integer.parseint(fields[2])-CSDN博客

一、分区排序多目录输出

### --- Mapper

package com.yanqi.mr.comment.step2;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

//第一对kv:使用SequenceFileinputformat读取，所以key:Text,Value:BytesWritable(原因是生成sequencefile文件指定就是这种类型)
public class CommentMapper extends Mapper<Text, BytesWritable, CommentBean, NullWritable> {
    //key就是文件名
    //value:一个文件的完整内容
    @Override
    protected void map(Text key, BytesWritable value, Context context) throws IOException, InterruptedException {
        //且分区每一行
        String str = new String(value.getBytes());
        String[] lines = str.split("\n");
        for (String line : lines) {
            CommentBean commentBean = parseStrToCommentBean(line);
            if (null != commentBean) {
                context.write(commentBean, NullWritable.get());
            }

        }

    }

    //切分字符串封装成commentbean对象
    public CommentBean parseStrToCommentBean(String line) {
        if (StringUtils.isNotBlank(line)) {
            //每一行进行切分
            String[] fields = line.split("\t");
            if (fields.length >= 9) {
                return new CommentBean(fields[0], fields[1], fields[2], Integer.parseInt(fields[3]), fields[4], fields[5], fields[6], Integer.parseInt(fields[7]),
                        fields[8]);
            }
            {
                return null;
            }
        }

        return null;
    }
}

### --- CommentBean

package com.yanqi.mr.comment.step2;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class CommentBean implements WritableComparable<CommentBean> {
    private String orderId;
    private String comment;
    private String commentExt;
    private int goodsNum;
    private String phoneNum;
    private String userName;
    private String address;
    private int commentStatus;
    private String commentTime;

    @Override
    public String toString() {
        return orderId+"\t"+comment+"\t"+commentExt+"\t"+goodsNum+"\t"+phoneNum+"\t"+userName+"\t"+address+"\t"+commentStatus+"\t"+commentTime;
    }
//无参构造

    public CommentBean() {
    }

    public CommentBean(String orderId, String comment, String commentExt, int goodsNum, String phoneNum, String userName, String address, int commentStatus, String commentTime) {
        this.orderId = orderId;
        this.comment = comment;
        this.commentExt = commentExt;
        this.goodsNum = goodsNum;
        this.phoneNum = phoneNum;
        this.userName = userName;
        this.address = address;
        this.commentStatus = commentStatus;
        this.commentTime = commentTime;
    }

    public String getOrderId() {
        return orderId;
    }

    public void setOrderId(String orderId) {
        this.orderId = orderId;
    }

    public String getComment() {
        return comment;
    }

    public void setComment(String comment) {
        this.comment = comment;
    }

    public String getCommentExt() {
        return commentExt;
    }

    public void setCommentExt(String commentExt) {
        this.commentExt = commentExt;
    }

    public int getGoodsNum() {
        return goodsNum;
    }

    public void setGoodsNum(int goodsNum) {
        this.goodsNum = goodsNum;
    }

    public String getPhoneNum() {
        return phoneNum;
    }

    public void setPhoneNum(String phoneNum) {
        this.phoneNum = phoneNum;
    }

    public String getUserName() {
        return userName;
    }

    public void setUserName(String userName) {
        this.userName = userName;
    }

    public String getAddress() {
        return address;
    }

    public void setAddress(String address) {
        this.address = address;
    }

    public int getCommentStatus() {
        return commentStatus;
    }

    public void setCommentStatus(int commentStatus) {
        this.commentStatus = commentStatus;
    }

    public String getCommentTime() {
        return commentTime;
    }

    public void setCommentTime(String commentTime) {
        this.commentTime = commentTime;
    }

    //定义排序规则,按照时间降序;0,1,-1
    @Override
    public int compareTo(CommentBean o) {
        return o.getCommentTime().compareTo(this.commentTime);
    }

    //序列化
    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(orderId);
        out.writeUTF(comment);
        out.writeUTF(commentExt);
        out.writeInt(goodsNum);
        out.writeUTF(phoneNum);
        out.writeUTF(userName);
        out.writeUTF(address);
        out.writeInt(commentStatus);
        out.writeUTF(commentTime);
    }

    //反序列化
    @Override
    public void readFields(DataInput in) throws IOException {
        this.orderId = in.readUTF();
        this.comment = in.readUTF();
        this.commentExt = in.readUTF();
        this.goodsNum = in.readInt();
        this.phoneNum = in.readUTF();
        this.userName = in.readUTF();
        this.address = in.readUTF();
        this.commentStatus = in.readInt();
        this.commentTime = in.readUTF();
    }
}

### --- 自定义分区器

package com.yanqi.mr.comment.step2;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;

public class CommentPartitioner extends Partitioner<CommentBean, NullWritable> {
    @Override
    public int getPartition(CommentBean commentBean, NullWritable nullWritable, int numPartitions) {
//        return (commentBean.getCommentStatus() & Integer.MAX_VALUE) % numPartitions;
      return commentBean.getCommentStatus();//0,1,2 -->对应分区编号的
    }
}

### --- 自定义OutputFormat
### --- CommentOutputFormat

package com.yanqi.mr.comment.step2;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

//最终输出的kv类型
public class CommentOutputFormat extends FileOutputFormat<CommentBean, NullWritable> {
    //负责写出数据的对象
    @Override
    public RecordWriter<CommentBean, NullWritable> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
        Configuration conf = job.getConfiguration();
        FileSystem fs = FileSystem.get(conf);
        //当前reducetask处理的分区编号来创建文件获取输出流
        //获取到在Driver指定的输出路径;0是好评，1是中评，2是差评
        String outputDir = conf.get("mapreduce.output.fileoutputformat.outputdir");
        FSDataOutputStream goodOut=null;
        FSDataOutputStream commonOut=null;
        FSDataOutputStream badOut=null;
        int id = job.getTaskAttemptID().getTaskID().getId();//当前reducetask处理的分区编号
        if(id==0){
            //好评数据
            goodOut  =fs.create(new Path(outputDir + "\\good\\good.log"));
        }else if(id ==1){
            //中评数据
            commonOut = fs.create(new Path(outputDir + "\\common\\common.log"));
        }else{
             badOut = fs.create(new Path(outputDir + "\\bad\\bad.log"));
        }


       return new CommentRecorderWrtier(goodOut,commonOut,badOut);

    }
}

### --- RecordWriter

package com.yanqi.mr.comment.step2;

import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

import java.io.IOException;

public class CommentRecorderWrtier extends RecordWriter<CommentBean, NullWritable> {
    //定义写出数据的流
    private FSDataOutputStream goodOut;
    private FSDataOutputStream commonOut;
    private FSDataOutputStream badOut;

    public CommentRecorderWrtier(FSDataOutputStream goodOut, FSDataOutputStream commonOut, FSDataOutputStream badOut) {
        this.goodOut = goodOut;
        this.commonOut = commonOut;
        this.badOut = badOut;
    }

    //实现把数据根据不同的评论类型输出到不同的目录下
    //写出数据的逻辑
    @Override
    public void write(CommentBean key, NullWritable value) throws IOException, InterruptedException {
        int commentStatus = key.getCommentStatus();
        String beanStr = key.toString();
        if (commentStatus == 0) {
            goodOut.write(beanStr.getBytes());
            goodOut.write("\n".getBytes());
            goodOut.flush();
        } else if (commentStatus == 1) {
            commonOut.write(beanStr.getBytes());
            commonOut.write("\n".getBytes());
            commonOut.flush();
        } else {
            badOut.write(beanStr.getBytes());
            badOut.write("\n".getBytes());
            badOut.flush();
        }
    }

    //释放资源
    @Override
    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
        IOUtils.closeStream(goodOut);
        IOUtils.closeStream(commonOut);
        IOUtils.closeStream(badOut);
    }
}

### --- Reducer

package com.yanqi.mr.comment.step2;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class CommentReducer extends Reducer<CommentBean, NullWritable, CommentBean, NullWritable> {
    @Override
    protected void reduce(CommentBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
        //遍历values，输出的是key；key：是一个引用地址，底层获取value同时，key的值也发生了变化
        for (NullWritable value : values) {
            context.write(key, value);
        }
    }
}

### --- Driver

package com.yanqi.mr.comment.step2;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class CommentDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf, "CommentDriver");
        job.setJarByClass(CommentDriver.class);

        job.setMapperClass(CommentMapper.class);
        job.setReducerClass(CommentReducer.class);

        job.setMapOutputKeyClass(CommentBean.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setOutputKeyClass(CommentBean.class);
        job.setOutputValueClass(NullWritable.class);

        job.setPartitionerClass(CommentPartitioner.class);
        //指定inputformat类型
        job.setInputFormatClass(SequenceFileInputFormat.class);
        //指定输出outputformat类型
        job.setOutputFormatClass(CommentOutputFormat.class);
        //指定输入，输出路径
        FileInputFormat.setInputPaths(job,
                new Path("E:\\merge\\outout"));
        FileOutputFormat.setOutputPath(job,
                new Path("E:\\merge\\outmulti-out"));
        //指定reducetask的数量
        job.setNumReduceTasks(3);
        boolean b = job.waitForCompletion(true);
        if (b) {
            System.exit(0);
        }
    }
}

二、编译打印

### --- 编译打印

~~~     输入输出参数
~~~     打印输出