大数据系列七:Storm实时流计算-滑动窗口(Kafka to File)

一.前言

Storm安装配置没有使用Hadoop,比较简单,网络相关介绍比较多,简列如下。

二.安装配置

2.1 tar -zxvf apache-storm-1.0.5.tar.gz
2.2 vi /etc/profile
STORM_HOME=/mnt/data/software/storm
PATH=$PATH:$STORM_HOME/bin

2.3 vi storm.yaml
storm.zookeeper.servers:
- “ipsnode1”
- “ipsnode2”
- “ipsnode3”

#nimbus
nimbus.seeds: [“ipsnode1”, “ipsnode2”] #HA
ui.port: 8081
supervisor.slots.ports:
- 6700
- 6701
- 6702
- 6703

storm.local.dir:"/storm/data"
2.4 cp to others nodes
2.5 start
nohup storm nimbus & #primary&sePrimary
nohup storm supervisor & #others
nohup storm ui & #primary
nohup storm logviewer & #primary

三.程序

本文描述的实验,使用kafka发送数据,每条消息数据格式(分割符可以是空格或逗号): word,word,word…
然后使用Storm的滑动窗口计算每一分钟(相当于滚动窗口)的每个word出现的次数和所有时间内word出现的次数,将统计结果写入文件。

KafkaBolt.java

package org.ips.demo;

import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseWindowedBolt;
import org.apache.storm.windowing.TupleWindow;
import org.apache.storm.tuple.Tuple;
import java.io.FileWriter;
import java.util.UUID;
import java.io.IOException;
import java.util.Map;
import java.util.HashMap;

public class KafkaBolt extends BaseWindowedBolt {
	private FileWriter fileWriter = null;
	private OutputCollector collector = null;
	private Map<String, Integer> counters;
	private Map<String, Integer> countern;
	@Override
	public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {//不失败的话只执行一次
		this.collector = outputCollector;
		this.counters = new HashMap<String, Integer>();
		this.countern = new HashMap<String, Integer>();
		try {			
			fileWriter = new FileWriter("/storm/data/"+UUID.randomUUID());	
            System.out.println("------******prepare ok");			
		} catch (IOException e) {
			throw new RuntimeException(e);		
		}
    }
    @Override
    public void execute(TupleWindow inputWindow) {//按条件循环执行
        System.out.println("------******execute");
		countern.clear();
		//Your business logic
		for(Tuple tuple: inputWindow.get()) {
        // do the windowing computation
            for (String wd: tuple.getString(4).split(" |,")){
			    if(!counters.containsKey(wd)){
			        counters.put(wd, 1);
			    }else {
			        Integer c = counters.get(wd) +1;
			        counters.put(wd, c);
			    }
				if(!countern.containsKey(wd)){
			        countern.put(wd, 1);
			    }else {
			        Integer c = countern.get(wd) +1;
			        countern.put(wd, c);
			    }
            }
		    try {			
		        fileWriter.write(counters.toString().replace("=",":")+" --Total\n");	
                fileWriter.write(countern.toString().replace("=",":")+" --window\n");						
		        fileWriter.flush();		
		    } catch (IOException e) {			
		       throw new RuntimeException(e);		
		    }
        }
		//end your codes
    }
    //@Override
    public void declareOutputFields(OutputFieldsDeclarer declarer){
    }
}

KafkaStormDemo.java

package org.ips.demo;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.kafka.spout.KafkaSpout;
import org.apache.storm.kafka.spout.KafkaSpoutConfig;
import org.apache.storm.topology.base.BaseWindowedBolt.Duration;
import org.apache.storm.utils.Utils;
import java.util.concurrent.TimeUnit;

public class KafkaStormDemo {
    public static void main(String[] args) throws Exception {
        KafkaSpoutConfig.Builder<String,String> builder = KafkaSpoutConfig.builder("ipsnode1:9092,ipsnode2:9092,ipsnode3:9092","IPS_CASH");
        builder.setGroupId("KAFKA_STORM");
        KafkaSpoutConfig<String, String> kafkaSpoutConfig= builder.build();
        TopologyBuilder tBuilder = new TopologyBuilder();
        tBuilder.setSpout("WordCountFileSpout",new KafkaSpout<String,String>(kafkaSpoutConfig), 1);
        tBuilder.setBolt("readKafkaBolt",new KafkaBolt().withWindow(new Duration(60,TimeUnit.SECONDS), new Duration(60,TimeUnit.SECONDS))).shuffleGrouping("WordCountFileSpout");
        Config config = new Config();
		config.put(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS, 240);
		String name = KafkaStormDemo.class.getSimpleName();
        if(args !=null && args.length > 0){
           config.setDebug(false);
           StormSubmitter.submitTopologyWithProgressBar(name, config, tBuilder.createTopology());
        }else{
           config.setDebug(true);
           LocalCluster cluster= new LocalCluster();
           cluster.submitTopology("KafkaStormDemo", config, tBuilder.createTopology());
		   Utils.sleep(10000);
           cluster.killTopology("KafkaStormDemo");
           cluster.shutdown();
        }
      }
}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>org.ips</groupId>
  <artifactId>demo</artifactId>
  <version>0.0.1-SNAPSHOT</version>

  <name>demo</name>
  <!-- FIXME change it to the project's website -->
  <url>http://www.example.com</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <maven.compiler.source>1.8</maven.compiler.source>
    <maven.compiler.target>1.8</maven.compiler.target>
  </properties>

  <dependencies>
	<dependency>
        <groupId>org.apache.storm</groupId>
        <artifactId>storm-core</artifactId>
        <version>1.2.3</version>
		<scope>provided</scope>
    </dependency> 
	<!-- use new kafka spout code -->
    <dependency>
        <groupId>org.apache.storm</groupId>
        <artifactId>storm-kafka-client</artifactId>
        <version>1.2.3</version>
    </dependency>
    <dependency>
        <groupId>org.apache.kafka</groupId>
        <artifactId>kafka-clients</artifactId>
        <version>2.5.0</version>
    </dependency>
  </dependencies>

 <build>
        <finalName>KafkaStormDemo</finalName>
        <plugins>
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <configuration>
                    <archive>
                    <manifest>
                    <mainClass>org.ips.demo.KafkaStormDemo</mainClass>
                    </manifest>
                    </archive>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

四.运行

[root@ipsnode1 test]# storm jar /home/test/kafkastormwindow.jar org.ips.demo.KafkaStormDemo test
Running: /usr/java/jdk1.8.0_251-amd64/bin/java -client -Ddaemon.name= -Dstorm.options= -Dstorm.home=/usr/local/storm-1.2.3 -Dstorm.log.dir=/usr/local/storm-1.2.3/logs -Djava.library.path=/usr/local/lib:/opt/local/lib:/usr/lib -Dstorm.conf.file= -cp /usr/local/storm-1.2.3/:/usr/local/storm-1.2.3/lib/:/usr/local/storm-1.2.3/extlib/*:/home/test/kafkastormwindow.jar:/usr/local/storm-1.2.3/conf:/usr/local/storm-1.2.3/bin -Dstorm.jar=/home/test/kafkastormwindow.jar -Dstorm.dependency.jars= -Dstorm.dependency.artifacts={} org.ips.demo.KafkaStormDemo test
493 [main] INFO o.a.s.k.s.KafkaSpoutConfig - Setting Kafka consumer property ‘auto.offset.reset’ to ‘earliest’ to ensure at-least-once processing
496 [main] INFO o.a.s.k.s.KafkaSpoutConfig - Setting Kafka consumer property ‘enable.auto.commit’ to ‘false’, because the spout does not support auto-commit
724 [main] WARN o.a.s.u.Utils - STORM-VERSION new 1.2.3 old null
758 [main] INFO o.a.s.StormSubmitter - Generated ZooKeeper secret payload for MD5-digest: -8392794165370994696:-9016585152412442965
911 [main] INFO o.a.s.u.NimbusClient - Found leader nimbus : ipsnode1:6627
937 [main] INFO o.a.s.s.a.AuthUtils - Got AutoCreds []
941 [main] INFO o.a.s.u.NimbusClient - Found leader nimbus : ipsnode1:6627
983 [main] INFO o.a.s.StormSubmitter - Uploading dependencies - jars…
990 [main] INFO o.a.s.StormSubmitter - Uploading dependencies - artifacts…
990 [main] INFO o.a.s.StormSubmitter - Dependency Blob keys - jars : [] / artifacts : []
996 [main] INFO o.a.s.StormSubmitter - Uploading topology jar /home/test/kafkastormwindow.jar to assigned location: /storm/data/nimbus/inbox/stormjar-be40c5ee-bb57-437a-9b62-50a399f2c69f.jar
Start uploading file ‘/home/test/kafkastormwindow.jar’ to ‘/storm/data/nimbus/inbox/stormjar-be40c5ee-bb57-437a-9b62-50a399f2c69f.jar’ (14580159 bytes)
[==================================================] 14580159 / 14580159
File ‘/home/test/kafkastormwindow.jar’ uploaded to ‘/storm/data/nimbus/inbox/stormjar-be40c5ee-bb57-437a-9b62-50a399f2c69f.jar’ (14580159 bytes)
1157 [main] INFO o.a.s.StormSubmitter - Successfully uploaded topology jar to assigned location: /storm/data/nimbus/inbox/stormjar-be40c5ee-bb57-437a-9b62-50a399f2c69f.jar
1157 [main] INFO o.a.s.StormSubmitter - Submitting topology KafkaStormDemo in distributed mode with conf {“storm.zookeeper.topology.auth.scheme”:“digest”,“storm.zookeeper.topology.auth.payload”:"-8392794165370994696:-9016585152412442965",“topology.message.timeout.secs”:240,“topology.debug”:false}
1157 [main] WARN o.a.s.u.Utils - STORM-VERSION new 1.2.3 old 1.2.3
1403 [main] INFO o.a.s.StormSubmitter - Finished submitting topology: KafkaStormDemo

五.Checking

kafka:
./kafka-console-producer.sh --broker-list ipsnode1:9092 -topic IPS_CASH

>111 222 333
>222,333
>333
>aaa,bbb,ccc
>bbb,ccc
>ccc
#间隔1min
>ccc
>bbb,ccc
>ccc,bbb,aaa
>111,333,222
>222,333
>333
>
data:
[root@ipsnode2 data]# more e0c958ad-2a0a-4483-9429-035fd8ab316e
{111:1, 222:1, 333:1} --Total
{111:1, 222:1, 333:1} --window
{111:1, 222:2, 333:2} --Total
{111:1, 222:2, 333:2} --window
{111:1, 222:2, 333:3} --Total
{111:1, 222:2, 333:3} --window
{aaa:1, 111:1, 222:2, ccc:1, 333:3, bbb:1} --Total
{aaa:1, 111:1, 222:2, ccc:1, 333:3, bbb:1} --window
{aaa:1, 111:1, 222:2, ccc:2, 333:3, bbb:2} --Total
{aaa:1, 111:1, 222:2, ccc:2, 333:3, bbb:2} --window
{aaa:1, 111:1, 222:2, ccc:3, 333:3, bbb:2} --Total
{aaa:1, 111:1, 222:2, ccc:3, 333:3, bbb:2} --window
{aaa:1, 111:1, 222:2, ccc:4, 333:3, bbb:2} --Total
{ccc:1} --window
{aaa:1, 111:1, 222:2, ccc:5, 333:3, bbb:3} --Total
{ccc:2, bbb:1} --window
{aaa:2, 111:1, 222:2, ccc:6, 333:3, bbb:4} --Total
{aaa:1, ccc:3, bbb:2} --window
{aaa:2, 111:2, 222:3, ccc:6, 333:4, bbb:4} --Total
{aaa:1, 111:1, ccc:3, 222:1, bbb:2, 333:1} --window
{aaa:2, 111:2, 222:4, ccc:6, 333:5, bbb:4} --Total
{aaa:1, 111:1, ccc:3, 222:2, bbb:2, 333:2} --window
{aaa:2, 111:2, 222:4, ccc:6, 333:6, bbb:4} --Total
{aaa:1, 111:1, ccc:3, 222:2, bbb:2, 333:3} --window

六.结束

[root@ipsnode1 test]# storm kill KafkaStormDemo
Running: /usr/java/jdk1.8.0_251-amd64/bin/java -client -Ddaemon.name= -Dstorm.options= -Dstorm.home=/usr/local/storm-1.2.3 -Dstorm.log.dir=/usr/local/storm-1.2.3/logs -Djava.library.path=/usr/local/lib:/opt/local/lib:/usr/lib -Dstorm.conf.file= -cp /usr/local/storm-1.2.3/:/usr/local/storm-1.2.3/lib/:/usr/local/storm-1.2.3/extlib/:/usr/local/storm-1.2.3/extlib-daemon/:/usr/local/storm-1.2.3/conf:/usr/local/storm-1.2.3/bin org.apache.storm.command.kill_topology KafkaStormDemo
2504 [main] INFO o.a.s.u.NimbusClient - Found leader nimbus : ipsnode1:6627
2559 [main] INFO o.a.s.c.kill-topology - Killed topology: KafkaStormDemo
[root@ipsnode1 test]#

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值