基于统计移动设备上网流量例子:
MyMapper
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MyMapper extends Mapper<LongWritable, Text, Text, PhoneWritable> {
protected void map(LongWritable key, Text value,
org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, PhoneWritable>.Context context)
throws java.io.IOException, InterruptedException {
String[] splits = value.toString().split("\t");
for (String string : splits) {
String phone = splits[2];
String upPackNum = splits[21];
String downPackNum = splits[22];
String upPayLoad = splits[23];
String downPayLoad = splits[24];
context.write(new Text(phone), new PhoneWritable(upPackNum,downPackNum,upPayLoad,downPayLoad));
}
};
}
MyReduce
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MyReduce extends Reducer<Text, PhoneWritable, Text, PhoneWritable> {
protected void reduce(Text key2, Iterable<PhoneWritable> values2,
org.apache.hadoop.mapreduce.Reducer<Text, PhoneWritable, Text, PhoneWritable>.Context context)
throws java.io.IOException, InterruptedException {
long upPackNum = 0L;
long downPackNum = 0L;
long upPayLoad = 0L;
long downPayLoad = 0L;
for (PhoneWritable phonWritable : values2) {
upPackNum += phonWritable.upPackNum;
downPackNum += phonWritable.downPackNum;
upPayLoad += phonWritable.upPayLoad;
downPayLoad += phonWritable.downPayLoad;
}
context.write(key2, new PhoneWritable(upPackNum+"",downPackNum+"",upPayLoad+"",downPayLoad+""));
};
}
PhoneWritable 自定义hadoop类型
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
public class PhoneWritable implements Writable {
Long upPackNum = 0l;
Long downPackNum = 0l;
Long upPayLoad = 0l;
Long downPayLoad = 0l;
public PhoneWritable() {//无参构造千万不能省略,否则初始化就失败
}
public PhoneWritable(String upPackNum, String downPackNum, String upPayLoad, String downPayLoad) {
this.upPackNum = Long.parseLong(upPackNum);
this.downPackNum = Long.parseLong(downPackNum);
this.upPayLoad = Long.parseLong(upPayLoad);
this.downPayLoad = Long.parseLong(downPayLoad);
}
@Override
public void readFields(DataInput in) throws IOException {
this.upPackNum = in.readLong();
this.downPackNum = in.readLong();
this.upPayLoad = in.readLong();
this.downPayLoad = in.readLong();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(upPackNum);
out.writeLong(downPackNum);
out.writeLong(upPayLoad);
out.writeLong(downPayLoad);
}
@Override
public String toString() {//覆写这个方法是为了输出文件的格式化
return "PhonWritable [upPackNum=" + upPackNum + ", downPackNum=" + downPackNum + ", upPayLoad=" + upPayLoad
+ ", downPayLoad=" + downPayLoad + "]";
}
}
PhonePartition(自定义分区类,需要继承HashPartitioner后面的泛型写的是key2,value2类型,因为分区是在1.3步骤,执行map函数并输出是在1.2进行)
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
/**
* 自定义分区规则
* 返回值就将它类似的看成一个区分不同分组的标示
* 比如这里手机号返回的是0
* 非手机号返回的是1
*/
public class PhonePartition extends HashPartitioner<Text, PhoneWritable>{
@Override
public int getPartition(Text key, PhoneWritable value, int numReduceTasks) {
//如果是手机号则返回0,反之返回1
return (key.toString().length()==11)?0:1;
}
}
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
/**
* 分区的例子必须打包成jar包才能运行
*/
public class TestPhoneLog {
private static final String INPUT_PATH = "hdfs://xxc:9000/input";
private static final String OUT_PATH = "hdfs://xxc:9000/out";
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
Configuration conf = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
Path outPath = new Path(OUT_PATH);
if(fileSystem.exists(outPath)){
fileSystem.delete(outPath, true);
}
Job job = new Job(conf,TestPhoneLog.class.getSimpleName());
job.setJarByClass(TestPhoneLog.class);//打成jar包,不能省略
FileInputFormat.setInputPaths(job, INPUT_PATH);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(PhoneWritable.class);
job.setPartitionerClass(PhonePartition.class);
job.setNumReduceTasks(2);//为什么运行的任务数量是2个?因为将将号码分成手机号和不是手机号两种
job.setReducerClass(MyReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(PhoneWritable.class);
FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
job.setOutputFormatClass(TextOutputFormat.class);
job.waitForCompletion(true);
}
}
最后的输出主要是part开头的两个文件