ProviceCountMapper.java
package os.bigdata.provincflowcount
import java.io.IOException
import org.apache.commons.lang.StringUtils
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce.Mapper
public class ProviceCountMapper extends Mapper<LongWritable, Text, Text, FlowBean>{
private Text k = new Text()
private FlowBean bean = new FlowBean()
@Override
protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
String line = value.toString()
String[] fields = StringUtils.split(line,'\t')
String phone = fields[1]
long upflow = Long.parseLong(fields[fields.length-3])
long downflow = Long.parseLong(fields[fields.length-2])
bean = new FlowBean(upflow,downflow)
k.set(phone)
bean.set(upflow, downflow)
context.write(k, bean)
}
}
ProviceCountReducer.java
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class ProviceCountReducer extends Reducer<Text, FlowBean, Text, FlowBean>{
private Text k = new Text();
private FlowBean bean = new FlowBean();
@Override
protected void reduce(Text key, Iterable<FlowBean> values, Context context)throws IOException, InterruptedException {
long upflowSum = 0;
long downflowSum = 0;
for(FlowBean bean: values) {
upflowSum += bean.getUpflow();
downflowSum += bean.getDownflow();
}
bean.set(upflowSum, downflowSum);
context.write(key, bean);
}
}
ProvicePartitioner.java
package os.bigdata.provincflowcount;
import java.util.HashMap;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class ProvicePartitioner extends Partitioner<Text, FlowBean> {
private static HashMap<String,Integer> provinceMap = new HashMap<String,Integer>();
static{
provinceMap.put("136", 0);
provinceMap.put("137", 1);
provinceMap.put("138", 2);
provinceMap.put("139", 3);
}
@Override
public int getPartition(Text key, FlowBean value, int numPartitions) {
String prefix = key.toString().substring(0,3);
Integer provinceNum = provinceMap.get(prefix);
if(provinceNum==null)
provinceNum = 4;
return provinceNum;
}
}
FlowBean.java
package os.bigdata.provincflowcount;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
public class FlowBean implements Writable{
private long upflow;
private long downflow;
private long sumflow;
public FlowBean() {
}
public long getUpflow() {
return upflow;
}
public void setUpflow(long upflow) {
this.upflow = upflow;
}
public long getDownflow() {
return downflow;
}
public void setDownflow(long downflow) {
this.downflow = downflow;
}
public long getSumflow() {
return sumflow;
}
public void setSumflow(long sumflow) {
this.sumflow = sumflow;
}
public FlowBean(long upflow, long downflow) {
this.upflow = upflow;
this.downflow = downflow;
this.sumflow = upflow + downflow;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(upflow);
out.writeLong(downflow);
}
@Override
public void readFields(DataInput in) throws IOException {
upflow = in.readLong();
downflow = in.readLong();
}
@Override
public String toString() {
return "FlowBean [upflow=" + upflow + ", downflow=" + downflow + ", sumflow=" + sumflow + "]";
}
public void set(long upflow,long downflow) {
this.upflow = upflow;
this.downflow = downflow;
this.sumflow = upflow + downflow;
}
}
JobClient.java
package os.bigdata.provincflowcount
import java.io.IOException
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
public class JobClient {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration()
Job job = Job.getInstance(conf)
job.setJarByClass(JobClient.class)
job.setMapperClass(ProviceCountMapper.class)
job.setReducerClass(ProviceCountReducer.class)
// map输出的kv类型与reduce输出的kv类型一致时,这两行可以省略
job.setMapOutputKeyClass(Text.class)
job.setMapOutputValueClass(FlowBean.class)
job.setOutputKeyClass(Text.class)
job.setOutputValueClass(FlowBean.class)
//通过显示指定partioner类来让我们自定义partioner起作用
job.setPartitionerClass(ProvicePartitioner.class)
//设置本次job运行时reduce tack进程数,数量要个分区的数量一致
job.setNumReduceTasks(4)
FileInputFormat.setInputPaths(job, new Path(args[0]))
FileOutputFormat.setOutputPath(job, new Path(args[1]))
job.waitForCompletion(true)
}
}