storm java 例子_流式计算之Storm_wordcount例子

一.构建maven开发环境

为了开发storm topology, 你需要把storm相关的jar包添加到classpath里面去:

要么手动添加所有相关的jar包, 要么使用maven来管理所有的依赖。storm的jar包发布在Clojars(一个maven库),

如果你使用maven的话,把下面的配置添加在你项目的pom.xml里面。

clojars.org

http://clojars.org/repo

storm

storm

0.5.3

test

二.代码范例

1.Topology 入口点 RollingTopWords

------------类似于hadoop的Job定义

本地模式(嵌入Local):

package storm.starter;

import backtype.storm.Config;

import backtype.storm.LocalCluster;

import backtype.storm.testing.TestWordSpout;

import backtype.storm.topology.TopologyBuilder;

import backtype.storm.tuple.Fields;

import storm.starter.bolt.MergeObjects;

import storm.starter.bolt.RankObjects;

import storm.starter.bolt.RollingCountObjects;

public class RollingTopWords {

public static void

main(String[] args) throws Exception {

final int TOP_N = 3;

TopologyBuilder builder = new

TopologyBuilder();

builder.setSpout(1, new TestWordSpout(),

5);

builder.setBolt(2, new RollingCountObjects(60,

10), 4)

.fieldsGrouping(1, new Fields("word"));

builder.setBolt(3, new RankObjects(TOP_N),

4)

.fieldsGrouping(2, new Fields("obj"));

builder.setBolt(4, new

MergeObjects(TOP_N))

.globalGrouping(3);

Config conf = new Config();

conf.setDebug(true);

LocalCluster cluster = new LocalCluster(); //

本地模式启动集群

cluster.submitTopology("rolling-demo", conf,

builder.createTopology());

Thread.sleep(10000);

cluster.shutdown();

}

}

部署模式:

package storm.starter;

import storm.starter.bolt.MergeObjects;

import storm.starter.bolt.RankObjects;

import storm.starter.bolt.RollingCountObjects;

import backtype.storm.Config;

import backtype.storm.StormSubmitter;

import backtype.storm.testing.TestWordSpout;

import backtype.storm.topology.TopologyBuilder;

import backtype.storm.tuple.Fields;

public class RollingTopWords {

public static void

main(String[] args) throws Exception {

final int TOP_N = 3;

TopologyBuilder builder = new

TopologyBuilder();

builder.setSpout(1, new TestWordSpout(),

5);

builder.setBolt(2, new RollingCountObjects(60,

10), 4).fieldsGrouping(

1, new Fields("word"));

builder.setBolt(3, new RankObjects(TOP_N),

4).fieldsGrouping(2,

new Fields("obj"));

builder.setBolt(4, new

MergeObjects(TOP_N)).globalGrouping(3);

Config conf = new Config();

conf.setDebug(true);

conf.setNumWorkers(20);

conf.setMaxSpoutPending(5000);

StormSubmitter.submitTopology("demo",

conf,

builder.createTopology());

Thread.sleep(10000);

}

}

2. 直接使用内置的TestWordSpout(随机产生一个word)

TestWordSpout

package backtype.storm.testing;

import backtype.storm.topology.OutputFieldsDeclarer;

import java.util.Map;

import backtype.storm.spout.SpoutOutputCollector;

import backtype.storm.task.TopologyContext;

import backtype.storm.topology.IRichSpout;

import backtype.storm.tuple.Fields;

import backtype.storm.tuple.Values;

import backtype.storm.utils.Utils;

import java.util.Random;

import org.apache.log4j.Logger;

public class TestWordSpout implements IRichSpout {

public static Logger LOG

= Logger.getLogger(TestWordSpout.class);

boolean

_isDistributed;

SpoutOutputCollector

_collector;

public TestWordSpout()

{

this(true);

}

public

TestWordSpout(boolean isDistributed) {

_isDistributed = isDistributed;

}

public boolean

isDistributed() {

return _isDistributed;

}

public void open(Map

conf, TopologyContext context, SpoutOutputCollector collector)

{

_collector = collector;

}

public void close()

{

}

public void nextTuple()

{

Utils.sleep(100);

final String[] words = new String[] {"nathan",

"mike", "jackson", "golda", "bertels"};

final Random rand = new Random();

final String word =

words[rand.nextInt(words.length)];

_collector.emit(new Values(word));

}

public void ack(Object

msgId) {

}

public void fail(Object

msgId) {

}

public void

declareOutputFields(OutputFieldsDeclarer declarer) {

declarer.declare(new Fields("word"));

}

}

3.各环节处理Bolt

RollingCountObjects 滚动计数word,并通过定时触发时间,清空计数列表

package storm.starter.bolt;

import backtype.storm.task.OutputCollector;

import backtype.storm.task.TopologyContext;

import backtype.storm.topology.IRichBolt;

import backtype.storm.topology.OutputFieldsDeclarer;

import backtype.storm.tuple.Fields;

import backtype.storm.tuple.Tuple;

import backtype.storm.tuple.Values;

import backtype.storm.utils.Utils;

import java.util.HashMap;

import java.util.HashSet;

import java.util.Map;

import java.util.Set;

@SuppressWarnings("serial")

public class RollingCountObjects implements IRichBolt {

private

HashMap _objectCounts

= new HashMap

long[]>();

private int

_numBuckets;

private transient Thread

cleaner;

private OutputCollector

_collector;

private int

_trackMinutes;

public

RollingCountObjects(int numBuckets, int trackMinutes) {

_numBuckets = numBuckets;

_trackMinutes = trackMinutes;

}

public long totalObjects

(Object obj) {

long[] curr = _objectCounts.get(obj);

long total = 0;

for (long l: curr) {

total+=l;

}

return total;

}

public int currentBucket

(int buckets) {

return (currentSecond()  /

secondsPerBucket(buckets)) % buckets;

}

public int

currentSecond() {

return (int) (System.currentTimeMillis() /

1000);

}

public int

secondsPerBucket(int buckets) {

return (_trackMinutes * 60 / buckets);

}

public long

millisPerBucket(int buckets) {

return (long) secondsPerBucket(buckets) *

1000;

}

@SuppressWarnings("rawtypes")

public void prepare(Map

stormConf, TopologyContext context, OutputCollector collector)

{

_collector = collector;

cleaner = new Thread(new Runnable() {

@SuppressWarnings("unchecked")

public

void run() {

Integer lastBucket =

currentBucket(_numBuckets);

while(true) {

int

currBucket = currentBucket(_numBuckets);

if(currBucket!=lastBucket) {

int bucketToWipe =

(currBucket + 1) % _numBuckets;

synchronized(_objectCounts)

{

Set objs = new

HashSet(_objectCounts.keySet());

for (Object obj: objs) {

long[] counts =

_objectCounts.get(obj);

long currBucketVal =

counts[bucketToWipe];

counts[bucketToWipe] = 0; //

*这行代码很关键*

long total =

totalObjects(obj);

if(currBucketVal!=0) {

_collector.emit(new Values(obj, total));

}

if(total==0) {

_objectCounts.remove(obj);

}

}

}

lastBucket =

currBucket;

}

long delta

= millisPerBucket(_numBuckets) - (System.currentTimeMillis() %

millisPerBucket(_numBuckets));

Utils.sleep(delta);

}

}

});

cleaner.start();

}

public void

execute(Tuple tuple) {

Object obj = tuple.getValue(0);

int bucket = currentBucket(_numBuckets);

synchronized(_objectCounts) {

long[]

curr = _objectCounts.get(obj);

if(curr==null) {

curr = new

long[_numBuckets];

_objectCounts.put(obj,

curr);

}

curr[bucket]++;

_collector.emit(new Values(obj, totalObjects(obj)));

_collector.ack(tuple);

}

}

public void cleanup()

{

}

public void

declareOutputFields(OutputFieldsDeclarer declarer) {

declarer.declare(new Fields("obj",

"count"));

}

}

RankObjects

package storm.starter.bolt;

import backtype.storm.task.TopologyContext;

import backtype.storm.topology.BasicOutputCollector;

import backtype.storm.topology.IBasicBolt;

import backtype.storm.topology.OutputFieldsDeclarer;

import backtype.storm.tuple.Fields;

import backtype.storm.tuple.Tuple;

import backtype.storm.tuple.Values;

import java.util.ArrayList;

import java.util.Collections;

import java.util.Comparator;

import java.util.List;

import java.util.Map;

import org.json.simple.JSONValue;

@SuppressWarnings("serial")

public class RankObjects implements IBasicBolt {

@SuppressWarnings("rawtypes")

List _rankings = new

ArrayList();

int _count;

Long _lastTime =

null;

public RankObjects(int

n) {

_count = n;

}

@SuppressWarnings("rawtypes")

private int

_compare(List one, List two) {

long valueOne = (Long) one.get(1);

long valueTwo = (Long) two.get(1);

long delta = valueTwo - valueOne;

if(delta > 0) {

return

1;

} else if (delta < 0) {

return

-1;

} else {

return

0;

}

} //end compare

private Integer

_find(Object tag) {

for(int i = 0; i <

_rankings.size(); ++i) {

Object cur

= _rankings.get(i).get(0);

if

(cur.equals(tag)) {

return i;

}

}

return null;

}

@SuppressWarnings("rawtypes")

public void prepare(Map

stormConf, TopologyContext context) {

}

@SuppressWarnings("rawtypes")

public void

execute(Tuple tuple, BasicOutputCollector collector) {

Object tag = tuple.getValue(0);

Integer existingIndex = _find(tag);

if (null != existingIndex) {

_rankings.set(existingIndex, tuple.getValues());

} else {

_rankings.add(tuple.getValues());

}

Collections.sort(_rankings, new

Comparator() {

public int

compare(List o1, List o2) {

return _compare(o1,

o2);

}

});

if (_rankings.size() > _count)

{

_rankings.remove(_count);

}

long currentTime =

System.currentTimeMillis();

if(_lastTime==null || currentTime

>= _lastTime + 2000) {

collector.emit(new

Values(JSONValue.toJSONString(_rankings)));

_lastTime

= currentTime;

}

}

public void cleanup()

{

}

public void

declareOutputFields(OutputFieldsDeclarer declarer) {

declarer.declare(new Fields("list"));

}

}

MergeObjects 对排序结果进行归并

package storm.starter.bolt;

import org.apache.log4j.Logger;

import backtype.storm.task.TopologyContext;

import backtype.storm.topology.BasicOutputCollector;

import backtype.storm.topology.IBasicBolt;

import backtype.storm.topology.OutputFieldsDeclarer;

import backtype.storm.tuple.Fields;

import backtype.storm.tuple.Tuple;

import backtype.storm.tuple.Values;

import java.util.ArrayList;

import java.util.Collections;

import java.util.Comparator;

import java.util.List;

import java.util.Map;

import org.json.simple.JSONValue;

@SuppressWarnings("serial")

public class MergeObjects implements IBasicBolt {

public static Logger LOG

= Logger.getLogger(MergeObjects.class);

@SuppressWarnings({

"rawtypes", "unchecked" })

private

List _rankings = new

ArrayList();

int _count = 10;

Long _lastTime;

public MergeObjects(int

n) {

_count = n;

}

@SuppressWarnings("rawtypes")

private int

_compare(List one, List two) {

long valueOne = (Long) one.get(1);

long valueTwo = (Long) two.get(1);

long delta = valueTwo - valueOne;

if(delta > 0) {

return

1;

} else if (delta < 0) {

return

-1;

} else {

return

0;

}

} //end compare

private Integer

_find(Object tag) {

for(int i = 0; i <

_rankings.size(); ++i) {

Object cur

= _rankings.get(i).get(0);

if

(cur.equals(tag)) {

return i;

}

}

return null;

}

@SuppressWarnings("rawtypes")

public void prepare(Map

stormConf, TopologyContext context) {

}

@SuppressWarnings({

"unchecked", "rawtypes" })

public void

execute(Tuple tuple, BasicOutputCollector collector) {

List merging

= (List) JSONValue.parse(tuple.getString(0));

for(List pair : merging) {

Integer

existingIndex = _find(pair.get(0));

if (null

!= existingIndex) {

_rankings.set(existingIndex,

pair);

} else

{

_rankings.add(pair);

}

Collections.sort(_rankings, new

Comparator() {

public int compare(List o1,

List o2) {

return _compare(o1, o2);

}

});

if

(_rankings.size() > _count) {

_rankings.subList(_count,

_rankings.size()).clear();

}

}

long currentTime =

System.currentTimeMillis();

if(_lastTime==null || currentTime

>= _lastTime + 2000) {

String

fullRankings = JSONValue.toJSONString(_rankings);

collector.emit(new Values(fullRankings));

LOG.info("Rankings: " + fullRankings);

_lastTime

= currentTime;

}

}

public void cleanup()

{

}

public void

declareOutputFields(OutputFieldsDeclarer declarer) {

declarer.declare(new Fields("list"));

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值