自定义组件Partitioner- 根据不同省份统计手机流量
默认情况下,Mapreduce会将map输出的kv对,按照相同的key分组,然后分发给reducetask(按照key.hashCode() % (reducetask数) 分发 )
需求
原始数据不变,依然是[原始数据地址]。如果让根据不同省份分别输出到不同的文件中,该怎么办?
思路
既然Mapreduce是按照key.hashCode() % (reducetask数) 的规则分发数据的,那么,如果我们可以改变这个规则,自定义一个规则,同一个省的数据分发到同一个reducetask,就能实现需求。
实现
JavaBean
public class Traffic implements Writable{
private String phone;
private int upStream;
private int downStream;
private int total;
public Traffic() {
}
public void setTraffic(String phone, int upStream, int downStream) {
this.phone = phone;
this.upStream = upStream;
this.downStream = downStream;
this.total = upStream + downStream;
}
public String getPhone() {
return phone;
}
public void setPhone(String phone) {
this.phone = phone;
}
public int getUpStream() {
return upStream;
}
public void setUpStream(int upStream) {
this.upStream = upStream;
}
public int getDownStream() {
return downStream;
}
public void setDownStream(int downStream) {
this.downStream = downStream;
}
public int getTotal() {
return total;
}
public void setTotal(int total) {
this.total = total;
}
@Override
public void readFields(DataInput in) throws IOException {
upStream = in.readInt();
downStream = in.readInt();
total = in.readInt();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(upStream);
out.writeInt(downStream);
out.writeInt(total);
}
@Override
public String toString() {
return "[phone=" + phone + ", upStream=" + upStream + ", downStream=" + downStream + ", total=" + total + "]";
}
}
自定义Partitioner
public class SelfPartitioner extends Partitioner<Text, Traffic> {
private static Map<String, Integer> provinces = new HashedMap();
//模拟数据
static {
provinces.put("136", 0);
provinces.put("137", 1);
provinces.put("138", 2);
provinces.put("139", 3);
}
@Override
public int getPartition(Text key, Traffic value, int arg2) {
String prefix = key.toString().substring(0, 3);
Integer integer = provinces.get(prefix);
return integer == null ? 4 : integer;
}
}
Mapper
public class TrafficSumMapper extends Mapper<LongWritable, Text, Text, Traffic>{
Traffic traffic = new Traffic();
Text k = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] fileds = line.split("\t");
if(fileds.length!=11) return;
int upStream = Integer.parseInt(fileds[8]);
int downStream = Integer.parseInt(fileds[9]);
traffic.setTraffic(fileds[1], upStream, downStream);
k.set(fileds[1]);
context.write(k, traffic);
}
}
reduce
public class TrafficSumReduce extends Reducer<Text, Traffic, Text, Traffic>{
@Override
protected void reduce(Text key, Iterable<Traffic> values, Context context) throws IOException, InterruptedException {
Traffic t = new Traffic();
int totalUpStream = 0;
int totalDownStream = 0;
int total = 0;
for (Traffic traffic : values) {
totalUpStream += traffic.getUpStream();
totalDownStream += traffic.getDownStream();
total += traffic.getTotal();
}
t.setTraffic(key.toString(), totalUpStream, totalDownStream);
context.write(key, t);
}
}
结果
生成了五个文件,每个文件存放不同省份的手机号