学习了几个星期,终于把hadoop的"helloworld"-----wordcount
跑通了..不仅如此,还跑通了wordcount topN ...
下面给分享下代码,以及解释..
思路是进行了2次mapreduce.
写了2个mapper 2个reducer 和1个combiner
第一次mapreduce下来的输出数据例为:
A 141
AB 1
ABC 368
ACKNOWLEDGMENTS 1
ACT 2
...
把第一次的输出当成第二次mapreduce的输入.
第一次的mapreduce比较简单,我直接贴第二次的
//mapreduce2 start
public static class Map2 extends Mapper {
// mapper 2start
public void
map(LongWritable key, Text value, Context context) throws
IOException, InterruptedException {
//for debug
System.out.println("enter
the mapper2");
String line = value.toString();
String
values=line.split("\t")[0]+"|"+line.split("\t")[1];
context.write(new Text(" "),new
Text(values));
// for debug
System.out.println("exit
the mapper2");
}
}//mapper2 end
//combiner start
public static class Combine extends Reducer {
ArrayList
top10Array= new ArrayList();
public void
reduce (Text key, Iterable values, Context
context)
throws
IOException, InterruptedException {
// for debug
System.out.println("enter
into combiner");
//arraylist 赋值
while(top10Array.size()<10)
{
top10Array.add(values.iterator().next().toString());
}
while(values.iterator().hasNext())
{
MySort(top10Array);
String
currentValues=values.iterator().next().toString();
Integer
currentnum=Integer.parseInt(currentValues.split("\\|")[1]);
for(int
i=0;i<10;i++)
{
Integer numofArray =
Integer.parseInt(top10Array.get(i).split("\\|")[1]);
if(currentnum
> numofArray)
{
top10Array.remove(0);
top10Array.add(currentValues);
break;
}
}// for
end
}//
while end
//
String combinerOutput =
"";
// for(int i=0; i
< 5; i++){
// combinerOutput =
combinerOutput+top5Array.get(i)+"\n ";
// }
MySort(top10Array);
context.write(new Text("
"), new Text(top10Array.get(0)));
context.write(new Text("
"), new Text(top10Array.get(1)));
context.write(new Text("
"), new Text(top10Array.get(2)));
context.write(new Text("
"), new Text(top10Array.get(3)));
context.write(new Text("
"), new Text(top10Array.get(4)));
context.write(new Text(" "), new
Text(top10Array.get(5)));
context.write(new Text("
"), new Text(top10Array.get(6)));
context.write(new Text("
"), new Text(top10Array.get(7)));
context.write(new Text("
"), new Text(top10Array.get(8)));
context.write(new Text("
"), new Text(top10Array.get(9)));
//for debug
System.out.println("exit
the combiner");
}
//MySort start
public void MySort(ArrayList
array)
throws IOException, InterruptedException{
int i,j;
for(i=0;i<9;i++)
{
for(j=9;j>i;j--)
{
Integer
latterOfArray=Integer.parseInt(array.get(j).toString().split("\\|")[1]);
Integer
fommerOfArray=Integer.parseInt(array.get(j-1).toString().split("\\|")[1]);
if(latterOfArray
{
Collections.swap(array, j, j-1);
}
}// for
2
}//for
1
}// MySort end
} // Combine end
//reduce2 start
public static class Reduce2 extends Reducer {
ArrayList top5Array= new
ArrayList();
public void reduce (Text key,
Iterable values, Context context)
throws
IOException, InterruptedException {
// for debug
System.out.println("enter
into reducer2");
//arraylist 赋值
while(top5Array.size()<5)
{
top5Array.add(values.iterator().next().toString());
}
MySort(top5Array);
while(values.iterator().hasNext())
{
String
currentValues=values.iterator().next().toString();
Integer
currentnum=Integer.parseInt(currentValues.split("\\|")[1]);
for(int
i=0;i<5;i++)
{
Integer numofArray =
Integer.parseInt(top5Array.get(i).split("\\|")[1]);
if(currentnum
> numofArray)
{
top5Array.remove(0);
top5Array.add(currentValues);
break;
}
}// for
end
}//
while end
String reducer2Output =
"top5:\n";
for(int i=0; i
< 5; i++){
reducer2Output
=reducer2Output+top5Array.get(i)+"\n ";
}
context.write(new Text(""),
new Text(reducer2Output));
//for debug
System.out.println("exit
the reducer");
}
//MySort start
public
void MySort(ArrayList array)
throws
IOException, InterruptedException{
int
i,j;
for(i=0;i<4;i++)
{
for(j=4;j>i;j--)
{
Integer
latterOfArray=Integer.parseInt(array.get(j).toString().split("\\|")[1]);
Integer
fommerOfArray=Integer.parseInt(array.get(j-1).toString().split("\\|")[1]);
if(latterOfArray
{
Collections.swap(array, j, j-1);
}
}// for 2
}//for 1
}//
MySort end
} // reducer2 end
//mapreduce2 end
代码注释的很详细了..希望对大家有点用