java topn_wordcount topN 的代码(Java)

学习了几个星期,终于把hadoop的"helloworld"-----wordcount

跑通了..不仅如此,还跑通了wordcount topN ...

下面给分享下代码,以及解释..

思路是进行了2次mapreduce.

写了2个mapper 2个reducer 和1个combiner

第一次mapreduce下来的输出​数据例为:

A 141

AB 1

ABC 368

ACKNOWLEDGMENTS 1

ACT 2

...

把第一次的输出​当成第二次mapreduce的输入.

第一次的mapreduce比较简单,我直接贴第二次的

//mapreduce2 start

public static class Map2 extends Mapper {

// mapper 2start

public void

map(LongWritable key, Text value, Context context) throws

IOException, InterruptedException {

//for debug

System.out.println("enter

the mapper2");

String line = value.toString();

String

values=line.split("\t")[0]+"|"+line.split("\t")[1];

context.write(new Text(" "),new

Text(values));

// for debug

System.out.println("exit

the mapper2");

}

}//mapper2 end

//combiner start

public static class Combine extends Reducer {

ArrayList

top10Array= new ArrayList();

public void

reduce (Text key, Iterable values, Context

context)

throws

IOException, InterruptedException {

// for debug

System.out.println("enter

into combiner");

//arraylist 赋值

while(top10Array.size()<10)

{

top10Array.add(values.iterator().next().toString());

}

while(values.iterator().hasNext())

{

MySort(top10Array);

String

currentValues=values.iterator().next().toString();

Integer

currentnum=Integer.parseInt(currentValues.split("\\|")[1]);

for(int

i=0;i<10;i++)

{

Integer numofArray =

Integer.parseInt(top10Array.get(i).split("\\|")[1]);

if(currentnum

> numofArray)

{

top10Array.remove(0);

top10Array.add(currentValues);

break;

}

}// for

end

}//

while end

//

String combinerOutput =

"";

//    for(int i=0; i

< 5; i++){

//    combinerOutput =

combinerOutput+top5Array.get(i)+"\n ";

//    }

MySort(top10Array);

context.write(new Text("

"), new Text(top10Array.get(0)));

context.write(new Text("

"), new Text(top10Array.get(1)));

context.write(new Text("

"), new Text(top10Array.get(2)));

context.write(new Text("

"), new Text(top10Array.get(3)));

context.write(new Text("

"), new Text(top10Array.get(4)));

context.write(new Text(" "), new

Text(top10Array.get(5)));

context.write(new Text("

"), new Text(top10Array.get(6)));

context.write(new Text("

"), new Text(top10Array.get(7)));

context.write(new Text("

"), new Text(top10Array.get(8)));

context.write(new Text("

"), new Text(top10Array.get(9)));

//for debug

System.out.println("exit

the combiner");

}

//MySort start

public void  MySort(ArrayList

array)

throws IOException, InterruptedException{

int i,j;

for(i=0;i<9;i++)

{

for(j=9;j>i;j--)

{

Integer

latterOfArray=Integer.parseInt(array.get(j).toString().split("\\|")[1]);

Integer

fommerOfArray=Integer.parseInt(array.get(j-1).toString().split("\\|")[1]);

if(latterOfArray

{

Collections.swap(array, j, j-1);

}

}// for

2

}//for

1

}// MySort end

} // Combine end

//reduce2 start

public static class Reduce2 extends Reducer {

ArrayList top5Array= new

ArrayList();

public void reduce (Text key,

Iterable values, Context context)

throws

IOException, InterruptedException {

// for debug

System.out.println("enter

into reducer2");

//arraylist 赋值

while(top5Array.size()<5)

{

top5Array.add(values.iterator().next().toString());

}

MySort(top5Array);

while(values.iterator().hasNext())

{

String

currentValues=values.iterator().next().toString();

Integer

currentnum=Integer.parseInt(currentValues.split("\\|")[1]);

for(int

i=0;i<5;i++)

{

Integer numofArray =

Integer.parseInt(top5Array.get(i).split("\\|")[1]);

if(currentnum

> numofArray)

{

top5Array.remove(0);

top5Array.add(currentValues);

break;

}

}// for

end

}//

while end

String reducer2Output =

"top5:\n";

for(int i=0; i

< 5; i++){

reducer2Output

=reducer2Output+top5Array.get(i)+"\n ";

}

context.write(new Text(""),

new Text(reducer2Output));

//for debug

System.out.println("exit

the reducer");

}

//MySort start

public

void  MySort(ArrayList array)

throws

IOException, InterruptedException{

int

i,j;

for(i=0;i<4;i++)

{

for(j=4;j>i;j--)

{

Integer

latterOfArray=Integer.parseInt(array.get(j).toString().split("\\|")[1]);

Integer

fommerOfArray=Integer.parseInt(array.get(j-1).toString().split("\\|")[1]);

if(latterOfArray

{

Collections.swap(array, j, j-1);

}

}// for 2

}//for 1

}//

MySort end

} // reducer2 end

//mapreduce2 end

代码注释的很详细了..希望对大家有点用

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值