仅供参考:
使用自定义类在mapreduce中的传递,详细解释:使用wordcount详解mapreduce使用
实体类源码:
package com.sfd.vo;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
/**
* 自定义的包装类:包装的电话号码,上限流量,下限流量,总流量;
* 该类实现了WritableComparable接口,从而是该类可以在map和reduce
* 之间序列化传递,并且可通过compareTo 方法实现排序
*
* @author sfd
*
*/
public class FlowBean implements WritableComparable<FlowBean>{
private String phone;
private long up_flow;
private long d_flow;
private long s_flow;
public FlowBean(String phone, long up_flow, long d_flow) {
super();
this.phone = phone;
this.up_flow = up_flow;
this.d_flow = d_flow;
this .s_flow=up_flow+d_flow;
}
public FlowBean() {
super();
// TODO Auto-generated constructor stub
}
public String getPhone() {
return phone;
}
public void setPhone(String phone) {
this.phone = phone;
}
public long getUp_flow() {
return up_flow;
}
public void setUp_flow(long up_flow) {
this.up_flow = up_flow;
}
public long getD_flow() {
return d_flow;
}
public void setD_flow(long d_flow) {
this.d_flow = d_flow;
}
public long getS_flow() {
return s_flow;
}
public void setS_flow(long s_flow) {
this.s_flow = s_flow;
}
public String toString() {
return phone+"\t"+up_flow+"\t"+d_flow+"\t"+s_flow;
}
/**
* 序列化传递
*/
public void write(DataOutput out) throws IOException {
out.writeUTF(phone);
out.writeLong(up_flow);
out.writeLong(d_flow);
out.writeLong(s_flow);
}
/**
* 反序列化接受
*/
public void readFields(DataInput in) throws IOException {
phone=in.readUTF();
up_flow=in.readLong();
d_flow=in.readLong();
s_flow=in.readLong();
}
/**
* 用来比较FlowBean对象的大小用来排序
*/
public int compareTo(FlowBean fb) {
//当传入map中的电话号码相同时判断
if(phone.equals(fb.phone)){
d_flow+=fb.d_flow;
s_flow+=fb.s_flow;
up_flow+=fb.up_flow;
return 0;
}
//用来倒序排列
return fb.s_flow<s_flow?-1:1;
}
}
map源码:
package com.sfd.flow;
import java.io.IOException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import com.sfd.vo.FlowBean;
/**
* mapreduce中的map方法继承mapper类,其中FlowBean是自定义的包装类,
* 包装要用的流量信息
*
* @author sfd
*
*/
public class FlowMap extends Mapper<LongWritable,Text,FlowBean, NullWritable>{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String line=value.toString();
String[] status=StringUtils.split(line,"\t");
String phone=status[1].trim();
long up_flow=Long.parseLong(status[7]);
long d_flow=Long.parseLong(status[8]);
context.write(new FlowBean(phone,up_flow,d_flow),NullWritable.get());
}
}
reduce源码:
package com.sfd.flow;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
import com.sfd.vo.FlowBean;
/**
* mapreduce中的reduce方法,继承reducer 规范类,
*
* @author sfd
*
*/
public class FlowReduce extends Reducer<FlowBean, NullWritable, FlowBean, NullWritable>{
@Override
protected void reduce(FlowBean fb, Iterable<NullWritable> values,Context context)
throws IOException, InterruptedException {
context.write(fb,NullWritable.get());
}
}
作业的调度类:
package com.sfd.flow;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.sfd.vo.FlowBean;
/**
* 用来描述一个特定的作业,(标准形式)
* @author sfd
*
*/
public class FlowRunner extends Configured implements Tool{
public int run(String[] args) throws Exception {
Configuration conf=new Configuration();
Job flowJob=Job.getInstance(conf);
flowJob.setJarByClass(FlowRunner.class);
flowJob.setMapperClass(FlowMap.class);
flowJob.setReducerClass(FlowReduce.class);
flowJob.setOutputKeyClass(FlowBean.class);
flowJob.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(flowJob, new Path(args[0]));
FileOutputFormat.setOutputPath(flowJob, new Path(args[1]));
//运行错误时 返回值为0 错误退出
//运行正确时 返回值为1 正确退出
return flowJob.waitForCompletion(true)?0:1;
}
public static void main(String[] args) throws Exception{
int result=ToolRunner.run(new Configuration(), new FlowRunner(), args);
System.exit(result);
}
}