话不多说,直接进入正题
数据样本:
程序在hadoop下执行顺序:
写好改代码后,把这个打成jar包,发送到虚拟机中
执行代码:
hadoop jar 打出jar包的路径 含有main方法的类名 输入路径 输出路径
该程序执行命令是:
hadoop jar /home/hadoop/wordcount.jar com.lmy.flowsum.SortMR /data/china_mobile.dat /data/wordcount1
对这个样本进行流量统计
未排序版本,首先声明个实体类:
package com.lmy.flowsum;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
public class FlowBean implements WritableComparable<FlowBean>{
private String phoneNumber;
private long upflow;
private long downflow;
private long sumflow;
/**
* 无参构造
*/
public FlowBean() {
}
/**
* 有参构造
* @param phoneNumber
* @param upflow
* @param downflow
*/
public FlowBean(String phoneNumber, long upflow, long downflow) {
super();
this.phoneNumber = phoneNumber;
this.upflow = upflow;
this.downflow = downflow;
this.sumflow = upflow + downflow;
}
public String getPhoneNumber() {
return phoneNumber;
}
public void setPhoneNumber(String phoneNumber) {
this.phoneNumber = phoneNumber;
}
public long getUpflow() {
return upflow;
}
public void setUpflow(long upflow) {
this.upflow = upflow;
}
public long getDownflow() {
return downflow;
}
public void setDownflow(long downflow) {
this.downflow = downflow;
}
public long getSumflow() {
return sumflow;
}
public void setSumflow(long sumflow) {
this.sumflow = sumflow;
}
@Override
public void readFields(DataInput in) throws IOException {
phoneNumber = in.readUTF();
upflow=in.readLong();
downflow=in.readLong();
sumflow=in.readLong();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(phoneNumber);
out.writeLong(upflow);
out.writeLong(downflow);
out.writeLong(sumflow);
}
public String toString(){
return upflow + "\t" + downflow +"\t" + sumflow;
}
@Override
public int compareTo(FlowBean o) {
return sumflow>o.getSumflow()?-1:1;
}
}
然后分别写map/reduce程序:
package com.lmy.flowsum;
import java.io.IOException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class FlowSumMapper extends Mapper<LongWritable, Text, Text, FlowBean>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, FlowBean>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
String fileds[] = StringUtils.split(line,"\t");
String phoneNumber = fileds[1];
long upflow = Long.parseLong(fileds[7]);
long downflow = Long.parseLong(fileds[8]);
context.write(new Text(phoneNumber), new FlowBean(phoneNumber,upflow,downflow));
}
}
package com.lmy.flowsum;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.zookeeper.Op.Create;
public class FlowSumReduce extends Reducer<Text, FlowBean, Text, FlowBean>{
@Override
protected void reduce(Text key, Iterable<FlowBean> values, Reducer<Text, FlowBean, Text, FlowBean>.Context context)
throws IOException, InterruptedException {
long upflow_counter=0;
long downflow_counter=0;
for(FlowBean bean : values){
upflow_counter = bean.getUpflow();
downflow_counter = bean.getDownflow();
}
context.write(key, new FlowBean(key.toString(), upflow_counter, downflow_counter));
}
}
最后写执行程序:
package com.lmy.flowsum;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class FlowSumRunner extends Configured implements Tool{
@Override
public int run(String[] args) throws Exception {
// 获取配置信息
Configuration conf = new Configuration();
Job job = Job.getInstance();
job.setJarByClass(FlowSumRunner.class);
job.setMapperClass(FlowSumMapper.class);
job.setReducerClass(FlowSumReduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FlowBean.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBean.class);
// 指定job的输入原始文件所在目录
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true)?0:1;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(),new FlowSumRunner(), args);
System.exit(res);
}
}
这里面程序需要导入jar包,或者使用maven找到对应的自动下载
排序版:
package com.lmy.flowsum;
import java.io.IOException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class SortMR {
public static class SortMapper extends Mapper<LongWritable, Text, FlowBean, NullWritable>{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String line = value.toString();
String fileds[] = StringUtils.split(line,"\t");
String phoneNumber = fileds[1];
long upflow = Long.parseLong(fileds[7]);
long downflow = Long.parseLong(fileds[8]);
context.write(new FlowBean(phoneNumber,upflow,downflow),NullWritable.get());
}
}
public static class SortReducer extends Reducer<FlowBean,NullWritable,Text,FlowBean>{
@Override
protected void reduce(FlowBean key, Iterable<NullWritable> values,Context context) throws IOException, InterruptedException{
String phoneNumber = key.getPhoneNumber();
context.write(new Text(phoneNumber), key);
}
}
public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(SortMR.class);
job.setMapperClass(SortMapper.class);
job.setReducerClass(SortReducer.class);
job.setMapOutputKeyClass(FlowBean.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBean.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true)?0:1);
}
}