本文主要介绍在storm中如何组件drpc和计算同时存在的多功能Topology编写,如何组件storm复杂的计算结构,包括其中应该注意的一些问题。
通过示例展示storm任务的开发。
public class LogSystemConfig {
private static String configFile = "logsystem.properties";
private static Properties props;
public static String zkHost;
public static String mqUrl;
public static String queueName;
public static int workerNum;
static {
Properties p = new Properties();
try {
InputStream confx = RedisManager.class.getClassLoader()
.getResourceAsStream(configFile);
p.load(confx);
} catch (IOException e) {
e.printStackTrace();
}
props = p;
zkHost = props.getProperty("zk_host");
mqUrl = props.getProperty("mq_url");
queueName = props.getProperty("queue_name");
workerNum = Integer.parseInt(props.getProperty("worker_num"));
}
}
上面是基本的配置类。
public class RedisManager {
private static JedisPool pool = null;
private static String configFile = "redis.properties";
static {
JedisPoolConfig config = new JedisPoolConfig();
Properties props = new Properties();
try {
InputStream confx = RedisManager.class.getClassLoader()
.getResourceAsStream(configFile);
props.load(confx);
} catch (IOException e) {
e.printStackTrace();
}
String redis_host = props.getProperty("redis_host");
int redis_post = Integer.parseInt(props.getProperty("redis_port"));
int redis_maxactive = Integer.parseInt(props
.getProperty("redis_maxactive"));
int redis_maxidle = Integer
.parseInt(props.getProperty("redis_maxidle"));
int redis_maxwait = Integer
.parseInt(props.getProperty("redis_maxwait"));
config.setMaxTotal(redis_maxactive);
config.setMaxIdle(redis_maxidle);
config.setMaxWaitMillis(redis_maxwait);
config.setTestOnBorrow(true);
pool = new JedisPool(config, redis_host, redis_post);
}
public static Jedis getJedis() {
return pool.getResource();
}
public static void returnResource(Jedis redis) {
if (redis != null)
pool.returnResource(redis);
}
public static void returnBrokenResource(Jedis redis) {
if (redis != null)
pool.returnBrokenResource(redis);
}
public static void poolDestroy() {
if (pool != null)
pool.destroy();
}
}
上面是管理Redis的代码片段
public class ActiveMqManager {
private static PooledConnection conn;
static {
String url = "failover:("
+ LogSystemConfig.mqUrl
+ ")?initialReconnectDelay=10000&timeout=10000&startupMaxReconnectAttempts=5";
ActiveMQConnectionFactory factory = new ActiveMQConnectionFactory(url);
try {
PooledConnectionFactory poolFactory = new PooledConnectionFactory(factory);
poolFactory.setMaxConnections(5);
conn = (PooledConnection) poolFactory.createConnection();
conn.start();
} catch (JMSException e) {
e.printStackTrace();
}
}
public static void destroy() {
try {
if (conn != null) {
conn.close();
}
} catch (JMSException e) {
e.printStackTrace();
}
}
public static PooledConnection getConn() {
return conn;
}
}
上面是做ActiveMq管理的代码
public class SystemClassifyBolt extends BaseBasicBolt{
public static final String search_key = "log_system";
public static final String split_key = "Data=";
public static final Log LOG = LogFactory.getLog(SystemClassifyBolt.class);
/**
*
*/
private static final long serialVersionUID = 1L;
public void execute(Tuple input, BasicOutputCollector collector) {
String log = input.getString(0);
String data = log.substring(log.indexOf(split_key)+5, log.length());
LogString logs = JSON.toJavaObject(JSON.parseObject(data), LogString.class);
String sys = logs.getSys();
Jedis client = RedisManager.getJedis();
try{
Transaction tx =client.multi();
tx.sadd(search_key, sys);
List<Object> result = tx.exec();
if (result == null || result.isEmpty()) {
LOG.info("Transaction error...");
return;
}
} catch (Exception e){
LOG.error("Execute Error.", e);
} finally {
RedisManager.returnResource(client);
JSONObject json = new JSONObject();
List<LogString> all = new ArrayList<LogString>();
all.add(logs);
json.put(search_key, all);
collector.emit(new Values(new String[]{json.toJSONString()}));
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields(new String[] {"log_system"}));
}
public void cleanup() {
try{
RedisManager.poolDestroy();
} catch (Exception e){
LOG.error("Destroy Redis Pool Error.", e);
}
}
}
上面这个bolt是处理日志数据,拿到sys字段加入到redis,做去重。
public class ActiveMqBolt extends BaseRichBolt {
public static final Log LOG = LogFactory.getLog(ActiveMqBolt.class);
/**
* serialVersionUID
*/
private static final long serialVersionUID = 1L;
private OutputCollector collector;
Session session;
Queue mqQueue;
MessageProducer producer;
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.collector = collector;
try {
session = ActiveMqManager.getConn().createSession(true, Session.AUTO_ACKNOWLEDGE);
mqQueue = session.createQueue(LogSystemConfig.queueName);
producer = session.createProducer(mqQueue);
} catch (JMSException e1) {
LOG.error("Create ActiveMq Error.", e1);
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
private void reconnect(){
try{
if(producer != null){
producer.close();
}
if(session != null){
session.close();
}
session = ActiveMqManager.getConn().createSession(true, Session.AUTO_ACKNOWLEDGE);
mqQueue = session.createQueue(LogSystemConfig.queueName);
producer = session.createProducer(mqQueue);
} catch (JMSException e){
LOG.error("Reconnect Error.",e);
}
}
public void execute(Tuple input) {
String log = input.getValues().get(0).toString();
try {
Message message = session.createTextMessage(log);
producer.send(message);
session.commit();
this.collector.ack(input);
} catch (JMSException e) {
LOG.error("Produce Message Error.", e);
this.collector.fail(input);
try {
session.rollback();
} catch (JMSException e1) {
LOG.error("Roll Bace Error.", e1);
}
reconnect();
}
}
public void cleanup() {
ActiveMqManager.destroy();
}
}
上面是将数据增加了字段后转发到MQ的bolt
public class SystemAchieveBolt extends BaseBasicBolt{
public static final String search_key = "log_system";
/**
*
*/
private static final long serialVersionUID = 1L;
public static final Log LOG = LogFactory.getLog(SystemAchieveBolt.class);
public void execute(Tuple input, BasicOutputCollector collector) {
Jedis client = RedisManager.getJedis();
try{
Set<String> result = client.smembers(search_key);
collector.emit(new Values(new Object[]{JSONObject.toJSONString(result),input.getString(1)}));
} catch (Exception e){
LOG.error("Jedis Get Sys Error.", e);
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields(new String[]{"sys","result"}));
}
}
上面是获取redis的sys字段结果的bolt
<pre name="code" class="java">public class LogSystemTopology {
public static void main(String[] args) {
try{
BrokerHosts brokerHosts = new ZkHosts(LogSystemConfig.zkHost);
SpoutConfig spoutConfig = new SpoutConfig(brokerHosts, "log_system", "",
"kafkaspout");
Config config = new Config();
if(args != null && args.length > 0){
config.setNumWorkers(LogSystemConfig.workerNum);
spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
DRPCSpout spout = new DRPCSpout("log_system");//使用DRPC SPOUT
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("log_system", spout, 10);
builder.setSpout("kafkaspout", new KafkaSpout(spoutConfig), 5);//使用Kafka Spout,这个topolopy中定义了两个不同的
builder.setBolt("getbolt", new SystemAchieveBolt(), 10).shuffleGrouping("log_system");//从redis中获取结果的bolt
builder.setBolt("return", new ReturnResults(), 10).shuffleGrouping("getbolt");//返回数据的bolt
builder.setBolt("logbolt", new SystemClassifyBolt(), 10).shuffleGrouping("kafkaspout");
builder.setBolt("activemqbolt", new ActiveMqBolt(), 10).shuffleGrouping("logbolt");
StormSubmitter.submitTopology("log_system", config, builder.createTopology());
} else {
spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
LocalDRPC drpc = new LocalDRPC();
DRPCSpout spout = new DRPCSpout("log_system",drpc);
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("log_system", spout, 10);
builder.setSpout("kafkaspout", new KafkaSpout(spoutConfig), 5);
builder.setBolt("getbolt", new SystemAchieveBolt(), 10).shuffleGrouping("log_system");
builder.setBolt("return", new ReturnResults(), 10).shuffleGrouping("getbolt");
builder.setBolt("logbolt", new SystemClassifyBolt(), 10).shuffleGrouping("kafkaspout");
builder.setBolt("activemqbolt", new ActiveMqBolt(), 10).shuffleGrouping("logbolt");
config.setMaxTaskParallelism(1);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("log_system", config,
builder.createTopology());
}
} catch (Exception e){
e.printStackTrace();
}
}
}
LogSystemTopology中定义了两条计算线路,其中一个提供drpc服务,一个处理Log数据。
以上是一个比较简单的storm任务,根据业务可以增加不同的spout、bolt来实现复杂的计算过程。