1
|
public
int
getPartition(IntWritable key, IntWritable value,
int
numPartitions)
|
这里面的numPartitions喝什么相关?怎么设置的?Job中没有进行相关的设置(NumReduceTask),这个参数numPartitions默认值是多少。
把具体例子全部代码贴在下面:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
import
org.apache.hadoop.conf.Configuration;
import
org.apache.hadoop.fs.Path;
import
org.apache.hadoop.io.IntWritable;
import
org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.Mapper;
import
org.apache.hadoop.mapreduce.Partitioner;
import
org.apache.hadoop.mapreduce.Reducer;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import
org.apache.hadoop.util.GenericOptionsParser;
import
java.io.IOException;
/**
* Created by dell on 2017/9/25.
* @auther w
*
*/
public
class
MySort {
static
final
String INPUT_PATH =
"hdfs://hadoopwang0:9000/test"
;
static
final
String OUT_PATH =
"hdfs://hadoopwang0:9000/testout"
;
public
static
void
main(String[] args)
throws
IOException, ClassNotFoundException, InterruptedException {
Configuration conf =
new
Configuration();
// String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
// if (otherArgs.length != 2) {
// System.err.println("Usage: wordcount <in> <out>");
// System.exit(2);
// }
Job job =
new
Job(conf,
"MySort"
);
job.setJarByClass(MySort.
class
);
job.setMapperClass(MyMap.
class
);
job.setReducerClass(MyReduce.
class
);
job.setPartitionerClass(MyPartition.
class
);
job.setOutputKeyClass(IntWritable.
class
);
job.setOutputValueClass(IntWritable.
class
);
FileInputFormat.addInputPath(job,
new
Path(INPUT_PATH));
FileOutputFormat.setOutputPath(job,
new
Path(OUT_PATH));
System.exit(job.waitForCompletion(
true
) ?
0
:
1
);
}
//Map方法:将输入的value转化为IntWritable类型,作为输出的Key。
public
static
class
MyMap
extends
Mapper<Object, Text, IntWritable, IntWritable>{
private
static
IntWritable data =
new
IntWritable();
@Override
protected
void
map(Object key, Text value, Context context)
throws
IOException, InterruptedException {
String line = value.toString();
data.set(Integer.parseInt(line));
context.write(data,
new
IntWritable(
1
));
}
}
//Reduce方法:将输入的Key复制到输出的value中,然后根据输入的<value-list>中元素的个数决定Key的输出次数
//全局用linenum来代表key的位次
public
static
class
MyReduce
extends
Reducer<IntWritable, IntWritable, IntWritable, IntWritable >{
private
static
IntWritable linenum =
new
IntWritable(
1
);
@Override
protected
void
reduce(IntWritable key, Iterable<IntWritable> values, Context context)
throws
IOException, InterruptedException {
System.out.println(
"Reducer:"
+key);
for
(IntWritable val : values) {
context.write(linenum, key);
linenum =
new
IntWritable(linenum.get() +
1
);
}
}
}
//自定义Partition函数:此函数根据输入的数据的最大值和MapReduce框架中的partition数量获取将输入数据按照
//按照大小分块的边界,然后根据输入数值和边界关系返回对应的Partiton ID
public
static
class
MyPartition
extends
Partitioner<IntWritable, IntWritable>{
public
int
getPartition(IntWritable key, IntWritable value,
int
numPartitions) {
int
Maxnumber =
6522
;
int
bound = Maxnumber / numPartitions +
1
;
int
Keynumber = key.get();
for
(
int
i =
0
; i < numPartitions; i++) {
if
(Keynumber < bound * i && Keynumber >= bound * (i -
1
)) {
return
i -
1
;
}
}
return
-
1
;
}
}
}
|