咱们Hadoop篇正文内容就这么多了,接下来的三篇是三个MapReduce的案例,以代码为主,实现过程相对简单。关注专栏《破茧成蝶——大数据篇》查看相关系列的文章~
目录
一、需求分析
现有如下四列的日志数据:
分别代表手机号码、上行流量、下行流量、总流量。现在需要对此数据进行加工,输出流量使用量前5的用户信息。
二、代码实现
2.1 首先定义一个Bean类,值得注意的是这里面有一个compareTo的比较方法,实现降序排序。
package com.xzw.hadoop.mapreduce.topn;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* @author: xzw
* @create_date: 2020/12/7 10:40
* @desc:
* @modifier:
* @modified_date:
* @desc:
*/
public class TopNBean implements WritableComparable<TopNBean> {
private long upFlow;
private long downFlow;
private long sumFlow;
public TopNBean() {
}
public TopNBean(long upFlow, long downFlow) {
super();
this.upFlow = upFlow;
this.downFlow = downFlow;
}
public long getUpFlow() {
return upFlow;
}
public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
}
public long getDownFlow() {
return downFlow;
}
public void setDownFlow(long downFlow) {
this.downFlow = downFlow;
}
public long getSumFlow() {
return sumFlow;
}
public void setSumFlow(long sumFlow) {
this.sumFlow = sumFlow;
}
@Override
public String toString() {
return upFlow + "\t" + downFlow + "\t" + sumFlow;
}
@Override
public int compareTo(TopNBean o) {//比较方法,降序排序
return Long.compare(o.sumFlow, this.sumFlow);
}
public void set(long upFlow, long downFlow) {
this.downFlow = downFlow;
this.upFlow = upFlow;
this.sumFlow = downFlow + upFlow;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(upFlow);
dataOutput.writeLong(downFlow);
dataOutput.writeLong(sumFlow);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
upFlow = dataInput.readLong();
downFlow = dataInput.readLong();
sumFlow = dataInput.readLong();
}
}
2.2 定义Mapper类
package com.xzw.hadoop.mapreduce.topn;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @author: xzw
* @create_date: 2020/12/7 10:57
* @desc:
* @modifier:
* @modified_date:
* @desc:
*/
public class TopNMapper extends Mapper<LongWritable, Text, TopNBean, Text> {
private TopNBean k = new TopNBean();
private Text v = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] fields = value.toString().split("\t");
v.set(fields[0]);
k.set(Long.parseLong(fields[1]), Long.parseLong(fields[2]));
context.write(k, v);
}
}
2.3 定义让所有数据分到同一组的比较器
package com.xzw.hadoop.mapreduce.topn;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/**
* @author: xzw
* @create_date: 2020/12/7 11:16
* @desc: 让所有数据分到同一组的比较器
* @modifier:
* @modified_date:
* @desc:
*/
public class TopNComparator extends WritableComparator {
protected TopNComparator() {
super(TopNBean.class, true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
return 0;
}
}
2.4 定义Reducer类
package com.xzw.hadoop.mapreduce.topn;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.Iterator;
/**
* @author: xzw
* @create_date: 2020/12/7 11:04
* @desc:
* @modifier:
* @modified_date:
* @desc:
*/
public class TopNReducer extends Reducer<TopNBean, Text, Text, TopNBean> {
@Override
protected void reduce(TopNBean key, Iterable<Text> values, Context context) throws IOException,
InterruptedException {
Iterator<Text> iterator = values.iterator();
for (int i = 0; i < 5; i++) {
if (iterator.hasNext()) {
context.write(iterator.next(), key);
}
}
}
}
2.5 定义Driver类
package com.xzw.hadoop.mapreduce.topn;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @author: xzw
* @create_date: 2020/12/7 11:21
* @desc:
* @modifier:
* @modified_date:
* @desc:
*/
public class TopNDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
args = new String[]{"e:/input/topn.txt", "e:/output1"};
Job job = Job.getInstance(new Configuration());
job.setJarByClass(TopNDriver.class);
job.setMapperClass(TopNMapper.class);
job.setReducerClass(TopNReducer.class);
job.setMapOutputKeyClass(TopNBean.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setGroupingComparatorClass(TopNComparator.class);
job.setOutputValueClass(TopNBean.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean b = job.waitForCompletion(true);
System.exit(b ? 0 : 1);
}
}
2.6 测试结果