统计APP中,全量用户的搜索统计,并且列出TopN
主要流程如下:
1.APP端搜索的搜索数据通过推送到Kafka中
2.Strom 与 Kafka 有 native的接口OpaqueTridentKafkaSpout
3.Storm中应用到SlidingDurationWindow即窗口统计
4. 数据写入到Hbase当中TridentHBaseMapper
code:
static Logger logger= LoggerFactory.getLogger(TopNTopology.class);
private static class TopNFuncation extends BaseFunction{
private int TopN;
public TopNFuncation(int TopN){
this.TopN=TopN;
}
public void execute(TridentTuple tridentTuple, TridentCollector tridentCollector) {
HashMap<String,Long> hashMap=new HashMap<String,Long>();
List<Map.Entry<String,Long>> entryList=new ArrayList<Map.Entry<String, Long>>();
//对list进行排序,升序排序
Collections.sort(entryList, new Comparator<Map.Entry<String, Long>>() {
public int compare(Map.Entry<String, Long> o1, Map.Entry<String, Long> o2) {
return o1.getValue().compareTo(o2.getValue());
}
});
//去除TopN,定义i,如果i大于TopN则
int i=1;
for (int j=entryList.size()-1;j>0;j--){
if(i>TopN){
break;
}
logger.info("the top is "+String.valueOf(j) +" the key is "+entryList.get(j).getKey() +" the values is "+String.valueOf(entryList.get(j).getValue()));
tridentCollector.emit(new Values(String.valueOf(j),entryList.get(j).getKey(),String.valueOf(entryList.get(j).getValue())));
}
}
}
private static class SplitFunction implements FlatMapFunction {
public Iterable<Values> execute(TridentTuple tridentTuple) {
ArrayList<Values> list=new ArrayList<Values>();
String sentencse=tridentTuple.getString(0);
String[] str=sentencse.split(" ");
for(String word:str){
list.add(new Values(word));
}
return list;
}
}
private static class WordAggreator extends BaseAggregator<HashMap<String,Long>>{
public HashMap<String, Long> init(Object o, TridentCollector tridentCollector) {
return new HashMap<String, Long>();
}
public void aggregate(HashMap<String, Long> stringLongHashMap, TridentTuple tridentTuple, TridentCollector tridentCollector) {
String word=tridentTuple.getStringByField("word");
long count=0;
if(stringLongHashMap.containsKey(word)){
count+=stringLongHashMap.get(word);
}
stringLongHashMap.put(word,count);
}
public void complete(HashMap<String, Long> stringLongHashMap, TridentCollector tridentCollector) {
tridentCollector.emit(new Values(stringLongHashMap));
}
}
public static void main(String []args) {
TridentTopology tridentTopology = new TridentTopology();
String zks = "10.1.69.11:2181,10.1.69.12:2181,10.1.69.13:2181";
String topic = "stormKafka";
String zkRoot = "/storm";
String id = "stormKafka";
BrokerHosts brokerHosts = new ZkHosts(zks);
// 定义Trident拓扑,从Kafka中获取数据
TridentKafkaConfig spoutConf = new TridentKafkaConfig(brokerHosts, topic);
spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpout(spoutConf);
//数据写入到Hbase当中
TridentHBaseMapper tridentHBaseMapper = new SimpleTridentHBaseMapper()
.withColumnFamily("result")
.withColumnFields(new Fields("word", "count"))
.withRowKeyField("rank");
HBaseState.Options options = new HBaseState.Options()
.withConfigKey("hbase")
.withDurability(Durability.SYNC_WAL)
.withMapper(tridentHBaseMapper)
.withTableName("TopHot");
StateFactory hBaseStateFactory = new HBaseStateFactory(options);
//统计10S内的窗口数据
WindowConfig durationWindow = SlidingDurationWindow.of(BaseWindowedBolt.Duration.seconds(10), BaseWindowedBolt.Duration.seconds(10));
//数据从Kafka读取
tridentTopology.newStream("spout", spout)
.flatMap(new SplitFunction(), new Fields("word"))
.window(durationWindow, new Fields("word"), new WordAggreator(), new Fields("wordCount"))
.each(new Fields("wordCount"), new TopNFuncation(Integer.valueOf(args[0])), new Fields("rank", "key", "value"))
.partitionPersist(hBaseStateFactory, new Fields("rank", "key", "value"), new HBaseUpdater(), new Fields());
Config conf = new Config();
Map<String, Object> hbConf = new HashMap<String, Object>();
conf.put("hbase", hbConf);
if (args.length == 0) {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("TopNTopology", conf, tridentTopology.build());
} else {
conf.setNumWorkers(3);
try {
StormSubmitter.submitTopologyWithProgressBar(args[1], conf, tridentTopology.build());
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
} catch (AuthorizationException e) {
e.printStackTrace();
}
}
}