每一行数据的第二个作为Key,value是倒数第二个和倒数第三个及它们的总和
封装Hadoop特有的序列化对象(必须实现readFields和write方法)
package com.homework;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class WritableObject implements Writable {
private Long up_flow;
private Long down_flow;
private Long total_flow;
public WritableObject() {
}
public WritableObject(Long up_flow, Long down_flow){
this.up_flow=up_flow;
this.down_flow=down_flow;
this.total_flow=up_flow+down_flow;
}
@Override
public void readFields(DataInput in) throws IOException {
this.up_flow=in.readLong();
this.down_flow=in.readLong();
this.total_flow=in.readLong();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(up_flow);
out.writeLong(down_flow);
out.writeLong(total_flow);
}
public Long getUp_flow() {
return up_flow;
}
public Long getDown_flow() {
return down_flow;
}
public Long getTotal_flow() {
return total_flow;
}
@Override
public String toString() {
return "up_flow:" + up_flow + ", down_flow:" + down_flow + ", total_flow:" + total_flow;
}
}
Mapper阶段
package com.homework;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class Phone_Mapper extends Mapper<LongWritable, Text,Text,WritableObject> {
WritableObject serializeObj=null;
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line=value.toString();
String[] words = line.split("\t");
String phone_num=words[1];
String up_flow=words[words.length-3];
String down_flow=words[words.length-2];
serializeObj=new WritableObject(Long.parseLong(up_flow),Long.parseLong(down_flow));
context.write(new Text(phone_num),serializeObj);
}
}
Reducer阶段
package com.homework;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class Phone_Reducer extends Reducer<Text,WritableObject, Text,WritableObject> {
@Override
protected void reduce(Text key, Iterable<WritableObject> values, Context context) throws IOException, InterruptedException {
Long up_flow=0L;
Long down_flow=0L;
for(WritableObject flow:values){
up_flow+=flow.getUp_flow();
down_flow+=flow.getDown_flow();
}
WritableObject flow_object=new WritableObject(up_flow,down_flow);
context.write(key,flow_object);
}
}
Driver阶段
package com.homework;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class Phone_Driver {
public static void main(String[] args) throws URISyntaxException, IOException, InterruptedException, ClassNotFoundException {
System.setProperty("HADOOP_USER_NAME","root");//root权限操作文件
Configuration configuration=new Configuration(); //你要将Job提交到那个集群
configuration.set("fs.defaultFS","hdfs://hadoop01:9000");//要获取集群信息
//获取文件系统
FileSystem fs=FileSystem.get(new URI("hdfs://hadoop01:9000"),configuration,"root");
// 定义输入路径
String INPUT_PATH = "hdfs://hadoop01:9000/china_move.txt";
// 定义输出路径
String OUT_PATH = "hdfs://hadoop01:9000/out";
// 如果输出目录存在,我们就删除
if (fs.exists(new Path(OUT_PATH)))
fs.delete(new Path(OUT_PATH), true);
Job job = Job.getInstance(configuration);// 构造job任务
job.setJarByClass(Phone_Driver.class);// 设置job类路径
// 设置map和reduce类
job.setMapperClass(Phone_Mapper.class);
job.setReducerClass(Phone_Reducer.class);
// 设置map的k,v类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(WritableObject.class);
// 设置reduce的k,v类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(WritableObject.class);
//指定输出的路径和设置输出的格式化类
FileInputFormat.setInputPaths(job , new Path(INPUT_PATH));
FileOutputFormat.setOutputPath(job , new Path(OUT_PATH));
//提交作业,退出
boolean result = job.waitForCompletion(true);
System.exit(result?0:1);
}
}
运行Driver就得到结果