一、安装storm集群
1、解压 tar zxvf apache-storm-0.9.3.tar.gz
2、修改Storm配置文件storm.yaml
#zookeeper 集群的地址 zookeeper集群安装参考 https://my.oschina.net/xiaozhou18/blog/787132
storm.zookeeper.servers:
- "node22"
- "node33"
- "node44"
nimbus.host: "node11" #storm主节点机器ip
storm.local.dir: "/usr/local/java/storm/tmp" #storm 运行jar存放的位置
supervisor.slots.ports:
- 6700
- 6701
- 6702
- 6703
3、配置好的storm 复制到其他几台机器上
4、启动storm集群 首先要保证zookeeper集群先起来
在nimbus节点执行"nohup bin/storm nimbus >/dev/null 2>&1 &"启动Nimbus后台程序,并放到后台执行
在supervisor节点执行"nohup bin/storm supervisor >/dev/null 2>&1 &"启动Supervisor后台程序,并放到后台执行;
在nimbus节点执行"nohup bin/storm ui >/dev/null 2>&1 &"启动UI后台程序,并放到后台执行,启动后可以通过http://{nimbus host}:8080观察集群的worker资源使用情况、Topologies的运行状态等信息。
在所有节点执行"nohup bin/storm logviewer >/dev/null 2>&1 &"启动log后台程序,并放到后台执行,启动后可以通过http://node11:8000观察日志信息。(nimbus节点可以不用启动logviewer进程,因为logviewer进程主要是为了方便查看任务的执行日志,这些执行日志都在supervisor节点上。)
5、通过浏览器登录监控UI页面,查看监控属性;http://node11:8080/index.html
6、提交topology作业./storm jar jar包路径 main的路径
如 ./storm jar /usr/local/java/firststorm.jar com.xiaozhou.stormdemo.firstdemo.CountTopoly
当supervisor自动停了的时候把 find / -name supervisor find / -name workers 把找出来的这两个文件夹删掉
二、java代码简单构建一个topology
1、pom.xml文件内容
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.xiaozhou.stormkafka</groupId>
<artifactId>stormkafka</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>stormkafka</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-kafka</artifactId>
<version>1.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.0.1</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>0.9.0.1</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</project>
2、代码
package com.xiaozhou.stormdemo.seconddemo;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.generated.StormTopology;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class WordCountTopology {
/**
* 读取文件的数据
* @author zhougn
*
*/
public static class WordSpout extends BaseRichSpout{
private SpoutOutputCollector collector;
@Override
public void open(Map conf, TopologyContext context,
SpoutOutputCollector collector) {
this.collector=collector;
}
@Override
public void nextTuple() {
//第一个参数 表示遍历扫描哪个文件夹下的文件
//第二个参数 表示扫描后缀名是啥的文件 可以有多个
//第三个参数 表示如果一个文件夹下还有文件 递归扫描
Collection<File> files = FileUtils.listFiles(new File("E:\\wordcount"), new String[]{"txt"}, true);
for (File file : files) {
try {
List<String> lines = FileUtils.readLines(file);
for (String string : lines) {
System.out.println("发射的数据是======="+string);
collector.emit(new Values(string));
Thread.sleep(1000);
}
FileUtils.moveFile(file, new File(file.getAbsolutePath()+System.currentTimeMillis()));
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("lines"));
}
}
/**
* 拿到每行数据进行切分
* @author zhougn
*
*/
public static class SplitCountBolt extends BaseRichBolt{
private OutputCollector collector;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector=collector;
}
@Override
public void execute(Tuple input) {
// TODO Auto-generated method stub
String field = input.getStringByField("lines");
String[] split = field.split("\t");
for (String string : split) {
collector.emit(new Values(string));
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("words"));
}
}
/**
* 单词计数
* @author zhougn
*
*/
public static class CountBolt extends BaseRichBolt{
Map<String, Integer> map=new HashMap<String, Integer>();
@Override
public void prepare(Map stormConf, TopologyContext context,OutputCollector collector) {
// TODO Auto-generated method stub
}
@Override
public void execute(Tuple input) {
// TODO Auto-generated method stub
String field = input.getStringByField("words");
Integer value = map.get(field);
if(value==null){
value=1;
}else{
value++;
}
map.put(field, value);
Set<String> set = map.keySet();
for (String string : set) {
Integer integer = map.get(string);
System.out.println(string+"=========="+integer);
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// TODO Auto-generated method stub
}
}
public static void main(String[] args) {
TopologyBuilder builder=new TopologyBuilder();
builder.setSpout("wordspout", new WordSpout());
builder.setBolt("SplitCountBolt", new SplitCountBolt()).shuffleGrouping("wordspout");
builder.setBolt("CountBolt", new CountBolt()).shuffleGrouping("SplitCountBolt");
StormTopology topology = builder.createTopology();
LocalCluster cluster=new LocalCluster();
cluster.submitTopology("wordcount", new Config(), topology);
}
}
三、kafka和storm结合简单使用
package com.xiaozhou.stormkafka.stormkafka;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.kafka.KafkaSpout;
import org.apache.storm.kafka.SpoutConfig;
import org.apache.storm.kafka.StringScheme;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.spout.SchemeAsMultiScheme;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Tuple;
import java.util.Map;
public class StormKafkaDemo {
public static class PrintBolt extends BaseRichBolt{
private OutputCollector collector;
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector=collector;
}
public void execute(Tuple input) {
String value = input.getString(0);
System.out.println(value);
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
public static void main(String[] args) {
//zookeeper集群de地址
ZkHosts zkHosts=new ZkHosts("node22:2181,node33:2181,node44:2181");
//zkHosts zookeeper集群地址
//kafkastormtopic 消费kafka哪个topic的信息
// "" kafka 在zk中的root
// id1 消费组
SpoutConfig config=new SpoutConfig(zkHosts,"kafkastormtopic","","id1");
//指定kafka消息类型为String类型
config.scheme= new SchemeAsMultiScheme(new StringScheme());
TopologyBuilder builder=new TopologyBuilder();
builder.setSpout("kafkaspout",new KafkaSpout(config));
builder.setBolt("printbolt",new PrintBolt()).shuffleGrouping("kafkaspout");
Config conf=new Config();
LocalCluster localCluster=new LocalCluster();
localCluster.submitTopology("kafkastorm",conf,builder.createTopology());
System.out.println("等待数据..........");
}
}