Partitioner 分区

最新推荐文章于 2024-06-12 09:33:14 发布

chenyuanshengboke

最新推荐文章于 2024-06-12 09:33:14 发布

阅读量485

点赞数

使用自定义partitioner来处理手机上网日志信息

为什么要使用分区？

1.根据业务需要，产生多个输出文件
　　2.多个reduce任务在运行，提高整体job的运行效率

复制代码
1 package partitioner;
2
3 import java.io.DataInput;
4 import java.io.DataOutput;
5 import java.io.IOException;
6
7 import org.apache.hadoop.conf.Configuration;
8 import org.apache.hadoop.fs.Path;
9 import org.apache.hadoop.io.LongWritable;
10 import org.apache.hadoop.io.Text;
11 import org.apache.hadoop.io.Writable;
12 import org.apache.hadoop.mapreduce.Job;
13 import org.apache.hadoop.mapreduce.Mapper;
14 import org.apache.hadoop.mapreduce.Partitioner;
15 import org.apache.hadoop.mapreduce.Reducer;
16 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
17 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
18 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
19 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
20 import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
21 /**
22 * 分区必须打包jar运行
23 *
24 /
25 public class KpiApp {
26 static final String INPUT_PATH = “hdfs://chaoren:9000/wlan”;//wlan是个文件夹，日志文件放在/wlan目录下
27 static final String OUT_PATH = “hdfs://chaoren:9000/out”;
28
29 public static void main(String[] args) throws Exception {
30 final Job job = new Job(new Configuration(),
31 KpiApp.class.getSimpleName());
32
33 //打包运行
34 job.setJarByClass(KpiApp.class);
35
36 // 1.1 指定输入文件路径
37 FileInputFormat.setInputPaths(job, INPUT_PATH);
38 // 指定哪个类用来格式化输入文件
39 job.setInputFormatClass(TextInputFormat.class);
40
41 // 1.2指定自定义的Mapper类
42 job.setMapperClass(MyMapper.class);
43 // 指定输出<k2,v2>的类型
44 job.setMapOutputKeyClass(Text.class);
45 job.setMapOutputValueClass(KpiWritable.class);
46
47 // 1.3 指定分区类
48 job.setPartitionerClass(KpiPartitioner.class);
49 job.setNumReduceTasks(2);//分成两个区
50
51 // 1.4 TODO 排序、分区
52
53 // 1.5 TODO （可选）归约
54
55 // 2.2 指定自定义的reduce类
56 job.setReducerClass(MyReducer.class);
57 // 指定输出<k3,v3>的类型
58 job.setOutputKeyClass(Text.class);
59 job.setOutputValueClass(KpiWritable.class);
60
61 // 2.3 指定输出到哪里
62 FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
63 // 设定输出文件的格式化类
64 job.setOutputFormatClass(TextOutputFormat.class);
65
66 // 把代码提交给JobTracker执行
67 job.waitForCompletion(true);
68 }
69
70 static class KpiPartitioner extends HashPartitioner<Text, KpiWritable>{
71
72 @Override
73 public int getPartition(Text key, KpiWritable value, int numReduceTasks) {
74 return (key.toString().length() == 11) ? 0 : 1;//0代表的是手机号码 1代表非手机号码
75 }
76
77 }
78
79 static class MyMapper extends Mapper<LongWritable, Text, Text, KpiWritable> {
80 protected void map(
81 LongWritable key,
82 Text value,
83 org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, KpiWritable>.Context context)
84 throws IOException, InterruptedException {
85 final String[] splited = value.toString().split("\t");
86 final String msisdn = splited[1];
87 final Text k2 = new Text(msisdn);
88 final KpiWritable v2 = new KpiWritable(splited[6], splited[7],
89 splited[8], splited[9]);
90 context.write(k2, v2);
91 };
92 }
93
94 static class MyReducer extends
95 Reducer<Text, KpiWritable, Text, KpiWritable> {
96 /*
97 * @param k2
98 * 表示整个文件中不同的手机号码
99 * @param v2s
100 * 表示该手机号在不同时段的流量的集合
101 */
102 protected void reduce(
103 Text k2,
104 java.lang.Iterable v2s,
105 org.apache.hadoop.mapreduce.Reducer<Text, KpiWritable, Text, KpiWritable>.Context context)
106 throws IOException, InterruptedException {
107 long upPackNum = 0L;
108 long downPackNum = 0L;
109 long upPayLoad = 0L;
110 long downPayLoad = 0L;
111
112 for (KpiWritable kpiWritable : v2s) {
113 upPackNum += kpiWritable.upPackNum;
114 downPackNum += kpiWritable.downPackNum;
115 upPayLoad += kpiWritable.upPayLoad;
116 downPayLoad += kpiWritable.downPayLoad;
117 }
118
119 final KpiWritable v3 = new KpiWritable(upPackNum + “”, downPackNum
120 + “”, upPayLoad + “”, downPayLoad + “”);
121 context.write(k2, v3);
122 };
123 }
124 }
125
126 class KpiWritable implements Writable {
127 long upPackNum;
128 long downPackNum;
129 long upPayLoad;
130 long downPayLoad;
131
132 public KpiWritable() {
133 }
134
135 public KpiWritable(String upPackNum, String downPackNum, String upPayLoad,
136 String downPayLoad) {
137 this.upPackNum = Long.parseLong(upPackNum);
138 this.downPackNum = Long.parseLong(downPackNum);
139 this.upPayLoad = Long.parseLong(upPayLoad);
140 this.downPayLoad = Long.parseLong(downPayLoad);
141 }
142
143 public void readFields(DataInput in) throws IOException {
144 this.upPackNum = in.readLong();
145 this.downPackNum = in.readLong();
146 this.upPayLoad = in.readLong();
147 this.downPayLoad = in.readLong();
148 }
149
150 public void write(DataOutput out) throws IOException {
151 out.writeLong(upPackNum);
152 out.writeLong(downPackNum);
153 out.writeLong(upPayLoad);
154 out.writeLong(downPayLoad);
155 }
156
157 @Override
158 public String toString() {
159 return upPackNum + “\t” + downPackNum + “\t” + upPayLoad + “\t”
160 + downPayLoad;
161 }
162 }