package storm.starter.chenbo;
import backtype.storm.Config;
import backtype.storm.Constants;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.task.ShellBolt;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import backtype.storm.spout.MultiScheme;
import backtype.storm.spout.SchemeAsMultiScheme;
import storm.kafka.BrokerHosts;
import storm.kafka.KafkaSpout;
import storm.kafka.StringScheme;
import storm.kafka.SpoutConfig;
import storm.kafka.ZkHosts;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.HashMap;
import java.util.TreeMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.UUID;
import java.text.SimpleDateFormat;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.Pipeline;
/**Storm从kafka读取数据存储到redis里
* storm度kafka,storm流进行计算,然后存数据库redis
* This topology demonstrates Storm's stream groupings and multilang
* capabilities.
*/
public class PVTopology {
public static class ParseBolt extends BaseBasicBolt {
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
// format: 205.212.115.106 - - [01/Jul/1995:00:00:12 -0400] "GET /shuttle/countdown/countdown.html HTTP/1.0" 200 3985
String line = tuple.getString(0);
String[] splits = line.split(" ");
if (splits.length > 6) {
String time = splits[3];
String url = splits[6];
int index = url.indexOf("?");
if (index > 0) {
url = url.substring(0, index);
}
System.out.println(time + "\t" + url);
collector.emit(new Values(System.currentTimeMillis() / (60 * 1000), url));
} else {
System.err.println("can not parse for log " + line);
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("time", "url"));
}
}
public static class CountBolt extends BaseBasicBolt {
TreeMap<Long, Map<String, Integer>> timeCounts = new TreeMap<Long, Map<String, Integer>>();
private Jedis jedis;
private String prefix;
private void output(long minute, String url, Integer count) {
for (int i = 0; i < 5; i++) {
try {
jedis.hset(prefix, new Date(minute * 60 * 1000) + "_" + url, count.toString());
break;
} catch (Exception e) {
e.printStackTrace();
}
}
}
@Override
public void prepare(Map stormConf, TopologyContext context) {
String host = "node2";
Integer port = 6379;
if (stormConf.get("redis.host") != null) {
host = (String) stormConf.get("redis.host");
}
if (stormConf.get("redis.port") != null) {
port = (Integer) stormConf.get("redis.port");
}
System.out.println("connecting to redis " + host + ":" + port);
this.jedis = new Jedis(host, port);
System.out.println("connected to redis " + host + ":" + port);
this.prefix = (String) stormConf.get("topology.name");
}
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
long currentMinute = System.currentTimeMillis() / (60 * 1000);
// handle timer tick
if (tuple.getSourceStreamId().equals(Constants.SYSTEM_TICK_STREAM_ID)) {
// save counts
Iterator<Map.Entry<Long, Map<String, Integer>>> iter = timeCounts
.entrySet().iterator();
while (iter.hasNext()) {
Map.Entry<Long, Map<String, Integer>> entry = iter.next();
long minute = entry.getKey();
if (currentMinute > minute + 1) {
for (Map.Entry<String, Integer> counts : entry.getValue().entrySet()) {
String url = counts.getKey();
Integer count = counts.getValue();
output(minute, url, count);
}
iter.remove();
} else {
break;
}
}
return;
}
long minute = tuple.getLong(0);
String url = tuple.getString(1);
//如果进来的数据是上二分钟,就丢弃
if (currentMinute > minute + 1) {
System.out.println("drop outdated tuple " + tuple);
return;
}
Map<String, Integer> counts = timeCounts.get(minute);
if (counts == null) {
counts = new HashMap<String, Integer>();
timeCounts.put(minute, counts);
}
Integer count = counts.get(url);
if (count == null)
count = 0;
count++;
counts.put(url, count);
collector.emit(new Values(minute, url, count));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("time", "url", "count"));
}
@Override
public Map<String, Object> getComponentConfiguration() {
Config conf = new Config();
conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 60);
return conf;
}
}
public static void main(String[] args) throws Exception {
TopologyBuilder builder = new TopologyBuilder();
// config kafka spout
String topic = "nasa_weblog";
ZkHosts zkHosts = new ZkHosts("192.168.188.4:4180,192.168.188.5:4180,192.168.188.6:4180");
SpoutConfig spoutConfig = new SpoutConfig(zkHosts, topic, "/MyKafka", // 偏移量offset的根目录
"MyTrack");// 对应一个应用
List<String> zkServers = new ArrayList<String>();
System.out.println(zkHosts.brokerZkStr);
for (String host : zkHosts.brokerZkStr.split(",")) {
zkServers.add(host.split(":")[0]);
}
spoutConfig.zkServers = zkServers;
spoutConfig.zkPort = 4180;
spoutConfig.forceFromStart = true; // 从头开始消费
spoutConfig.socketTimeoutMs = 60 * 1000;
spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); // 定义输出为String
KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
builder.setSpout("kafka_spout", kafkaSpout, 1);
builder.setBolt("parse", new ParseBolt(), 2).shuffleGrouping("kafka_spout");
builder.setBolt("count", new CountBolt(), 4).fieldsGrouping("parse", new Fields("time", "url"));
Config conf = new Config();
conf.setNumWorkers(3);
// StormSubmitter.submitTopologyWithProgressBar(args[0] + "_pv", conf,
// builder.createTopology());
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology(topic + "_pv", conf, builder.createTopology());
}
}