数据类型:
1363157984041 13660577991(手机号) 5C-0E-8B-92-5C-20:CMCC-EASY 120.197.40.4 s19.cnzz.com 站点统计 24 9 6960(上行流量) 690(下行流量) 200
需求:
求每个手机号的上行,下行流量总和,和总流量,并以bean方式传输,并按照手机号开头三位进行分区
统计类:
package mrpro927;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/*
*需求1:同一个手机号上行,下行,总流量之和
*
*/
public class phoneDataBeans {
//
public static class MyMapper extends Mapper<LongWritable, Text, Text, phoneBean>{
Text t = new Text();
phoneBean p = new phoneBean();
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, Text, phoneBean>.Context context)
throws IOException, InterruptedException {
String[] split = value.toString().split("\t");
//清洗出无用数据
if(split.length == 11){
t.set(split[1]);
p.setUpflow(Integer.parseInt(split[7]));
p.setDownflow(Integer.parseInt(split[8]));
p.setSum(p.getUpflow()+p.getDownflow());
context.write(t, p);
}
}
}
//每组调用一次
public static class MyReducer extends Reducer<Text, phoneBean, Text, phoneBean>{
phoneBean p = new phoneBean();
@Override
protected void reduce(Text key, Iterable<phoneBean> values,
Reducer<Text, phoneBean, Text, phoneBean>.Context context)
throws IOException, InterruptedException {
int upsum = 0;
int downsum = 0;
int sum = 0;
for(phoneBean t:values){
upsum += t.getUpflow();
downsum += t.getDownflow();
sum +=t.getSum();
}
p.setUpflow(upsum);
p.setDownflow(downsum);
p.setSum(sum);
context.write(key, p);
}
}
public static class MyPartitioner extends Partitioner<Text, phoneBean>{
@Override
public int getPartition(Text key, phoneBean value, int numPartitions) {
String substring = key.toString().substring(0, 3);
if(substring.equals("136")){
return 0;
}else if(substring.equals("137")){
return 1;
}else if(substring.equals("138")){
return 2;
}else if(substring.equals("139")){
return 3;
}else {
return 4;
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//加载配置文件
Configuration conf = new Configuration();
//eclipse运行设置linux用户名
System.setProperty("HADOOP_USER_NAME", "mading");
//启动一个job
Job job = Job.getInstance(conf);
//指定当前任务的主类
job.setJarByClass(phoneDataBeans.class);
//指定mapper和reducer类
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
//指定map输出的key,value类型,如果和reduce的输出类型相同的情况下可以省略
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(phoneBean.class);
//指定reduce输出的key,value类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//指定分区算法
job.setPartitionerClass(MyPartitioner.class);
//设置reducetask的并行度
job.setNumReduceTasks(5);
//指定文件输入的路径,这里是HA高可用集群的路径
FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/phonedatain"));
//指定文件的输出路径
FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/pout01"));
//提交job
job.waitForCompletion(true);
}
}
bean类,注意实现Writable接口
因为没有进行排序算法的重写,所以这种自定义类只能放在value的位置,不能放在key的位置
package mrpro927;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
/*
* 自定义的类作为MapReduce传输对象的时候,必须序列化,实现Writable 接口
*/
public class phoneBean implements Writable{
private int upflow;
private int downflow;
private int sum;
public int getUpflow() {
return upflow;
}
public void setUpflow(int upflow) {
this.upflow = upflow;
}
public int getDownflow() {
return downflow;
}
public void setDownflow(int downflow) {
this.downflow = downflow;
}
public int getSum() {
return sum;
}
public void setSum(int sum) {
this.sum = sum;
}
@Override
public String toString() {
return upflow + "\t" + downflow + "\t" + sum ;
}
//序列化的方法,对象=》二进制
//map发到reduce端的的时候先序列化
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(upflow);
out.writeInt(downflow);
out.writeInt(sum);
}
//反序列化的方法,到reduce端的时候进行反序列化,和序列化的顺序一定要一致
@Override
public void readFields(DataInput in) throws IOException {
this.upflow = in.readInt();
this.downflow = in.readInt();
this.sum = in.readInt();
}
}