一.前言
Storm安装配置没有使用Hadoop,比较简单,网络相关介绍比较多,简列如下。
二.安装配置
2.1 tar -zxvf apache-storm-1.0.5.tar.gz
2.2 vi /etc/profile
STORM_HOME=/mnt/data/software/storm
PATH=$PATH:$STORM_HOME/bin
2.3 vi storm.yaml
storm.zookeeper.servers:
- “ipsnode1”
- “ipsnode2”
- “ipsnode3”
#nimbus
nimbus.seeds: [“ipsnode1”, “ipsnode2”] #HA
ui.port: 8081
supervisor.slots.ports:
- 6700
- 6701
- 6702
- 6703
storm.local.dir:"/storm/data"
2.4 cp to others nodes
2.5 start
nohup storm nimbus & #primary&sePrimary
nohup storm supervisor & #others
nohup storm ui & #primary
nohup storm logviewer & #primary
三.程序
本文描述的实验,使用kafka发送数据,每条消息数据格式(分割符可以是空格或逗号): word,word,word…
然后使用Storm的滑动窗口计算每一分钟(相当于滚动窗口)的每个word出现的次数和所有时间内word出现的次数,将统计结果写入文件。
KafkaBolt.java
package org.ips.demo;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseWindowedBolt;
import org.apache.storm.windowing.TupleWindow;
import org.apache.storm.tuple.Tuple;
import java.io.FileWriter;
import java.util.UUID;
import java.io.IOException;
import java.util.Map;
import java.util.HashMap;
public class KafkaBolt extends BaseWindowedBolt {
private FileWriter fileWriter = null;
private OutputCollector collector = null;
private Map<String, Integer> counters;
private Map<String, Integer> countern;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {//不失败的话只执行一次
this.collector = outputCollector;
this.counters = new HashMap<String, Integer>();
this.countern = new HashMap<String, Integer>();
try {
fileWriter = new FileWriter("/storm/data/"+UUID.randomUUID());
System.out.println("------******prepare ok");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public void execute(TupleWindow inputWindow) {//按条件循环执行
System.out.println("------******execute");
countern.clear();
//Your business logic
for(Tuple tuple: inputWindow.get()) {
// do the windowing computation
for (String wd: tuple.getString(4).split(" |,")){
if(!counters.containsKey(wd)){
counters.put(wd, 1);
}else {
Integer c = counters.get(wd) +1;
counters.put(wd, c);
}
if(!countern.containsKey(wd)){
countern.put(wd, 1);
}else {
Integer c = countern.get(wd) +1;
countern.put(wd, c);
}
}
try {
fileWriter.write(counters.toString().replace("=",":")+" --Total\n");
fileWriter.write(countern.toString().replace("=",":")+" --window\n");
fileWriter.flush();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
//end your codes
}
//@Override
public void declareOutputFields(OutputFieldsDeclarer declarer){
}
}
KafkaStormDemo.java
package org.ips.demo;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.kafka.spout.KafkaSpout;
import org.apache.storm.kafka.spout.KafkaSpoutConfig;
import org.apache.storm.topology.base.BaseWindowedBolt.Duration;
import org.apache.storm.utils.Utils;
import java.util.concurrent.TimeUnit;
public class KafkaStormDemo {
public static void main(String[] args) throws Exception {
KafkaSpoutConfig.Builder<String,String> builder = KafkaSpoutConfig.builder("ipsnode1:9092,ipsnode2:9092,ipsnode3:9092","IPS_CASH");
builder.setGroupId("KAFKA_STORM");
KafkaSpoutConfig<String, String> kafkaSpoutConfig= builder.build();
TopologyBuilder tBuilder = new TopologyBuilder();
tBuilder.setSpout("WordCountFileSpout",new KafkaSpout<String,String>(kafkaSpoutConfig), 1);
tBuilder.setBolt("readKafkaBolt",new KafkaBolt().withWindow(new Duration(60,TimeUnit.SECONDS), new Duration(60,TimeUnit.SECONDS))).shuffleGrouping("WordCountFileSpout");
Config config = new Config();
config.put(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS, 240);
String name = KafkaStormDemo.class.getSimpleName();
if(args !=null && args.length > 0){
config.setDebug(false);
StormSubmitter.submitTopologyWithProgressBar(name, config, tBuilder.createTopology());
}else{
config.setDebug(true);
LocalCluster cluster= new LocalCluster();
cluster.submitTopology("KafkaStormDemo", config, tBuilder.createTopology());
Utils.sleep(10000);
cluster.killTopology("KafkaStormDemo");
cluster.shutdown();
}
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.ips</groupId>
<artifactId>demo</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>demo</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.2.3</version>
<scope>provided</scope>
</dependency>
<!-- use new kafka spout code -->
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-kafka-client</artifactId>
<version>1.2.3</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>2.5.0</version>
</dependency>
</dependencies>
<build>
<finalName>KafkaStormDemo</finalName>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>org.ips.demo.KafkaStormDemo</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
</plugins>
</build>
</project>
四.运行
[root@ipsnode1 test]# storm jar /home/test/kafkastormwindow.jar org.ips.demo.KafkaStormDemo test
Running: /usr/java/jdk1.8.0_251-amd64/bin/java -client -Ddaemon.name= -Dstorm.options= -Dstorm.home=/usr/local/storm-1.2.3 -Dstorm.log.dir=/usr/local/storm-1.2.3/logs -Djava.library.path=/usr/local/lib:/opt/local/lib:/usr/lib -Dstorm.conf.file= -cp /usr/local/storm-1.2.3/:/usr/local/storm-1.2.3/lib/:/usr/local/storm-1.2.3/extlib/*:/home/test/kafkastormwindow.jar:/usr/local/storm-1.2.3/conf:/usr/local/storm-1.2.3/bin -Dstorm.jar=/home/test/kafkastormwindow.jar -Dstorm.dependency.jars= -Dstorm.dependency.artifacts={} org.ips.demo.KafkaStormDemo test
493 [main] INFO o.a.s.k.s.KafkaSpoutConfig - Setting Kafka consumer property ‘auto.offset.reset’ to ‘earliest’ to ensure at-least-once processing
496 [main] INFO o.a.s.k.s.KafkaSpoutConfig - Setting Kafka consumer property ‘enable.auto.commit’ to ‘false’, because the spout does not support auto-commit
724 [main] WARN o.a.s.u.Utils - STORM-VERSION new 1.2.3 old null
758 [main] INFO o.a.s.StormSubmitter - Generated ZooKeeper secret payload for MD5-digest: -8392794165370994696:-9016585152412442965
911 [main] INFO o.a.s.u.NimbusClient - Found leader nimbus : ipsnode1:6627
937 [main] INFO o.a.s.s.a.AuthUtils - Got AutoCreds []
941 [main] INFO o.a.s.u.NimbusClient - Found leader nimbus : ipsnode1:6627
983 [main] INFO o.a.s.StormSubmitter - Uploading dependencies - jars…
990 [main] INFO o.a.s.StormSubmitter - Uploading dependencies - artifacts…
990 [main] INFO o.a.s.StormSubmitter - Dependency Blob keys - jars : [] / artifacts : []
996 [main] INFO o.a.s.StormSubmitter - Uploading topology jar /home/test/kafkastormwindow.jar to assigned location: /storm/data/nimbus/inbox/stormjar-be40c5ee-bb57-437a-9b62-50a399f2c69f.jar
Start uploading file ‘/home/test/kafkastormwindow.jar’ to ‘/storm/data/nimbus/inbox/stormjar-be40c5ee-bb57-437a-9b62-50a399f2c69f.jar’ (14580159 bytes)
[==================================================] 14580159 / 14580159
File ‘/home/test/kafkastormwindow.jar’ uploaded to ‘/storm/data/nimbus/inbox/stormjar-be40c5ee-bb57-437a-9b62-50a399f2c69f.jar’ (14580159 bytes)
1157 [main] INFO o.a.s.StormSubmitter - Successfully uploaded topology jar to assigned location: /storm/data/nimbus/inbox/stormjar-be40c5ee-bb57-437a-9b62-50a399f2c69f.jar
1157 [main] INFO o.a.s.StormSubmitter - Submitting topology KafkaStormDemo in distributed mode with conf {“storm.zookeeper.topology.auth.scheme”:“digest”,“storm.zookeeper.topology.auth.payload”:"-8392794165370994696:-9016585152412442965",“topology.message.timeout.secs”:240,“topology.debug”:false}
1157 [main] WARN o.a.s.u.Utils - STORM-VERSION new 1.2.3 old 1.2.3
1403 [main] INFO o.a.s.StormSubmitter - Finished submitting topology: KafkaStormDemo
五.Checking
kafka:
./kafka-console-producer.sh --broker-list ipsnode1:9092 -topic IPS_CASH
>111 222 333
>222,333
>333
>aaa,bbb,ccc
>bbb,ccc
>ccc
#间隔1min
>ccc
>bbb,ccc
>ccc,bbb,aaa
>111,333,222
>222,333
>333
>
data:
[root@ipsnode2 data]# more e0c958ad-2a0a-4483-9429-035fd8ab316e
{111:1, 222:1, 333:1} --Total
{111:1, 222:1, 333:1} --window
{111:1, 222:2, 333:2} --Total
{111:1, 222:2, 333:2} --window
{111:1, 222:2, 333:3} --Total
{111:1, 222:2, 333:3} --window
{aaa:1, 111:1, 222:2, ccc:1, 333:3, bbb:1} --Total
{aaa:1, 111:1, 222:2, ccc:1, 333:3, bbb:1} --window
{aaa:1, 111:1, 222:2, ccc:2, 333:3, bbb:2} --Total
{aaa:1, 111:1, 222:2, ccc:2, 333:3, bbb:2} --window
{aaa:1, 111:1, 222:2, ccc:3, 333:3, bbb:2} --Total
{aaa:1, 111:1, 222:2, ccc:3, 333:3, bbb:2} --window
{aaa:1, 111:1, 222:2, ccc:4, 333:3, bbb:2} --Total
{ccc:1} --window
{aaa:1, 111:1, 222:2, ccc:5, 333:3, bbb:3} --Total
{ccc:2, bbb:1} --window
{aaa:2, 111:1, 222:2, ccc:6, 333:3, bbb:4} --Total
{aaa:1, ccc:3, bbb:2} --window
{aaa:2, 111:2, 222:3, ccc:6, 333:4, bbb:4} --Total
{aaa:1, 111:1, ccc:3, 222:1, bbb:2, 333:1} --window
{aaa:2, 111:2, 222:4, ccc:6, 333:5, bbb:4} --Total
{aaa:1, 111:1, ccc:3, 222:2, bbb:2, 333:2} --window
{aaa:2, 111:2, 222:4, ccc:6, 333:6, bbb:4} --Total
{aaa:1, 111:1, ccc:3, 222:2, bbb:2, 333:3} --window
六.结束
[root@ipsnode1 test]# storm kill KafkaStormDemo
Running: /usr/java/jdk1.8.0_251-amd64/bin/java -client -Ddaemon.name= -Dstorm.options= -Dstorm.home=/usr/local/storm-1.2.3 -Dstorm.log.dir=/usr/local/storm-1.2.3/logs -Djava.library.path=/usr/local/lib:/opt/local/lib:/usr/lib -Dstorm.conf.file= -cp /usr/local/storm-1.2.3/:/usr/local/storm-1.2.3/lib/:/usr/local/storm-1.2.3/extlib/:/usr/local/storm-1.2.3/extlib-daemon/:/usr/local/storm-1.2.3/conf:/usr/local/storm-1.2.3/bin org.apache.storm.command.kill_topology KafkaStormDemo
2504 [main] INFO o.a.s.u.NimbusClient - Found leader nimbus : ipsnode1:6627
2559 [main] INFO o.a.s.c.kill-topology - Killed topology: KafkaStormDemo
[root@ipsnode1 test]#