Storm如果是数据了很大,持久化到DB中,就需要频繁的做磁盘IO,这样的话明显不太好。一般来说做持久化一般是和redis,他们2个是比较配套的。他们都是基于内存的数据库。官方也是不推荐把Storm的数据持久化到DB,因为Storm可能会有多多少少的数据丢失,包括hadoop都是一样的。所以Storm处理了大批量的数据。如果是数据量少,就没有必要使用Storm这个工具了。
由于Redis有RDB和AOF两种持久化方式,所以下面的例子将使用Storm集成redis,并将数据持久化到Storm中
这里需要用到Spout类(SampleSpout.java),作用是轮询获得的数据并且进行发射到Bolt(StormRedisBolt.java)中。还有拓扑类Topology.java、Redis操作类RedisOperations用于初始化redis以及相关操作方法
首先redis需要配置好AOF或者RDB持久化配置,然后使用命令启动redis,并指定配置文件,我这里解决配置了RDB模式
/usr/local/redis/bin/redis-server /usr/local/redis/etc/redis.conf
进入RDB存放目录,目前是没有dump.rdb文件的,
SampleSpout.java
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
public class SampleSpout extends BaseRichSpout {
private static final long serialVersionUID = 1L;
private SpoutOutputCollector spoutOutputCollector;
private static final Map<Integer, String> FIRSTNAMEMAP = new HashMap<Integer, String>();
static {
FIRSTNAMEMAP.put(0, "john");
FIRSTNAMEMAP.put(1, "nick");
FIRSTNAMEMAP.put(2, "mick");
FIRSTNAMEMAP.put(3, "tom");
FIRSTNAMEMAP.put(4, "jerry");
}
private static final Map<Integer, String> LASTNAME = new HashMap<Integer, String>();
static {
LASTNAME.put(0, "anderson");
LASTNAME.put(1, "watson");
LASTNAME.put(2, "ponting");
LASTNAME.put(3, "dravid");
LASTNAME.put(4, "lara");
}
private static final Map<Integer, String> COMPANYNAME = new HashMap<Integer, String>();
static {
COMPANYNAME.put(0, "abc");
COMPANYNAME.put(1, "dfg");
COMPANYNAME.put(2, "pqr");
COMPANYNAME.put(3, "ecd");
COMPANYNAME.put(4, "awe");
}
public void open(Map conf, TopologyContext context,
SpoutOutputCollector spoutOutputCollector) {
// Open the spout
this.spoutOutputCollector = spoutOutputCollector;
}
public void nextTuple() {
// Storm cluster repeatedly call this method to emit the continuous //
// stream of tuples.
final Random rand = new Random();
// generate the random number from 0 to 4.
int randomNumber = rand.nextInt(5);
spoutOutputCollector.emit (new Values(FIRSTNAMEMAP.get(randomNumber),LASTNAME.get(randomNumber),COMPANYNAME.get(randomNumber)));
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// emit the field site.
declarer.declare(new Fields("firstName","lastName","companyName"));
}
}
StormRedisBolt.java
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.IBasicBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Tuple;
public class StormRedisBolt implements IBasicBolt{
private static final long serialVersionUID = 2L;
private RedisOperations redisOperations = null;
private String redisIP = null;
private int port;
public StormRedisBolt(String redisIP, int port) {
this.redisIP = redisIP;
this.port = port;
}
public void execute(Tuple input, BasicOutputCollector collector) {
Map<String, Object> record = new HashMap<String, Object>();
//"firstName","lastName","companyName")
record.put("firstName", input.getValueByField("firstName"));
record.put("lastName", input.getValueByField("lastName"));
record.put("companyName", input.getValueByField("companyName"));
redisOperations.insert(record, UUID.randomUUID().toString());
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
public Map<String, Object> getComponentConfiguration() {
return null;
}
public void prepare(Map stormConf, TopologyContext context) {
redisOperations = new RedisOperations(this.redisIP, this.port);
}
public void cleanup() {
}
}
redis操作类RedisOperations.java
import java.io.Serializable;
import java.util.Map;
import com.fasterxml.jackson.databind.ObjectMapper;
import redis.clients.jedis.Jedis;
public class RedisOperations implements Serializable {
private static final long serialVersionUID = 1L;
Jedis jedis = null;
public RedisOperations(String redisIP, int port) {
// Connecting to Redis on localhost
jedis = new Jedis(redisIP, port);
}
public void insert(Map<String, Object> record, String id) {
try {
jedis.set(id, new ObjectMapper().writeValueAsString(record));
} catch (Exception e) {
System.out.println("Record not persist into datastore : ");
}
}
}
Topology.java
import java.util.ArrayList;
import java.util.List;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.topology.TopologyBuilder;
public class Topology {
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {
TopologyBuilder builder = new TopologyBuilder();
// List<String> zks = new ArrayList<String>();
// zks.add("192.168.100.142");
List<String> cFs = new ArrayList<String>();
cFs.add("personal");
cFs.add("company");
// set the spout class
builder.setSpout("spout", new SampleSpout(), 2);
// set the bolt class
builder.setBolt("bolt", new StormRedisBolt("192.168.1.114",6379), 2).shuffleGrouping("spout");
Config conf = new Config();
conf.setDebug(true);
// create an instance of LocalCluster class for
// executing topology in local mode.
LocalCluster cluster = new LocalCluster();
// StormRedisTopology is the name of submitted topology.
cluster.submitTopology("StormRedisTopology", conf, builder.createTopology());
try {
Thread.sleep(10000);
} catch (Exception exception) {
System.out.println("Thread interrupted exception : " + exception);
}
// kill the StormRedisTopology
cluster.killTopology("StormRedisTopology");
// shutdown the storm test cluster
cluster.shutdown();
}
}
执行结果
在RDB存储目录中也生成RDB文件,说明我们持久化成功了