kafka-storm-es这条线其实就两个结点,kafka-storm还行,storm-es我主要是参考了
https://blog.csdn.net/yl3395017/article/details/77496034?utm_source=gold_browser_extension这篇博客,因为里面缺失了很多东西,而且我的代码也有些改动,在这里将自己走过的坑分享一下,希望对君有用。
我用的storm是1.1.0版本的,es是5.6.10版本的。
我的pom.xml文件:
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-kafka</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-kafka-client</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>0.9.0.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.10.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-elasticsearch</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>net.sf.json-lib</groupId>
<artifactId>json-lib</artifactId>
<version>2.4</version>
<classifier>jdk15</classifier>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>5.6.1</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>5.6.1</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>x-pack-transport</artifactId>
<version>5.6.1</version>
</dependency>
<dependency>
<groupId>org.springframework.data</groupId>
<artifactId>spring-data-elasticsearch</artifactId>
<version>3.0.2.RELEASE</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>cn.ljh.storm.helloworld.ExclamationTopology</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
改写的ES中的4个类有:
public abstract class NewAbstractEsBolt extends BaseRichBolt {
private static final Logger LOG = LoggerFactory.getLogger(NewAbstractEsBolt.class);
protected static Client client;
protected OutputCollector collector;
private NewEsConfig esConfig;
public NewAbstractEsBolt(NewEsConfig esConfig) {
this.esConfig = esConfig;
}
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
try {
this.collector = outputCollector;
synchronized (NewAbstractEsBolt.class) {
if (client == null) {
client = new NewStormElasticSearchClient(esConfig).construct();
}
}
} catch (Exception e) {
LOG.warn("unable to initialize EsBolt ", e);
}
}
public abstract void execute(Tuple tuple);
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
}
static Client getClient() {
return NewAbstractEsBolt.client;
}
static void replaceClient(Client client) {
NewAbstractEsBolt.client = client;
}
}
public class NewEsConfig implements Serializable {
private final String clusterName;
private final String[] nodes;
private final Map<String, String> additionalConfiguration;
public NewEsConfig(String clusterName, String[] nodes) {
this(clusterName, nodes, Collections.<String, String>emptyMap());
}
public NewEsConfig(String clusterName, String[] nodes, Map<String, String> additionalConfiguration) {
this.clusterName = clusterName;
this.nodes = nodes;
this.additionalConfiguration = new HashMap(additionalConfiguration);
}
TransportAddress[] getTransportAddresses() {
String[] ns = nodes;
TransportAddress[] addressArr = new TransportAddress[ns.length];
for (int i = 0; i < ns.length; i++) {
try {
addressArr[i] = new InetSocketTransportAddress(InetAddress.getByName(ns[i]), 9300);
} catch (UnknownHostException e) {
e.printStackTrace();
}
}
return addressArr;
}
Settings toBasicSettings() {
return Settings.builder()
.put("cluster.name",clusterName)
.put("transport.tcp.compress", true)
.build();
}
}
public class NewEsIndexBolt extends NewAbstractEsBolt {
private final EsTupleMapper tupleMapper;
public NewEsIndexBolt(NewEsConfig esConfig, EsTupleMapper tupleMapper) {
super(esConfig);
this.tupleMapper = tupleMapper;
}
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
super.prepare(map, topologyContext, outputCollector);
}
public void execute(Tuple tuple) {
try {
String source = tupleMapper.getSource(tuple);
//如果想要定期产生index,在此改动,比如 new SimpleDateFormat("YYYYMMddHH").format(new Date()).toString();按小时生成,在前方bolt传过来倒是不行,具体原因不知。
String index = tupleMapper.getIndex(tuple);
String type = tupleMapper.getType(tuple);
String id = tupleMapper.getId(tuple);
client.prepareIndex(index, type, id).setSource(source).execute().actionGet();
collector.ack(tuple);
} catch (Exception e) {
collector.reportError(e);
collector.fail(tuple);
}
}
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
}
}
public final class NewStormElasticSearchClient implements Serializable {
private final NewEsConfig esConfig;
public NewStormElasticSearchClient(NewEsConfig esConfig) {
this.esConfig = esConfig;
}
public Client construct() {
Settings settings = esConfig.toBasicSettings();
TransportClient transportClient =new PreBuiltXPackTransportClient(settings).addTransportAddresses(esConfig.getTransportAddresses());
return transportClient;
}
}
在storm-es时,需要获取source,index,type,id四个字段,但不能直接插入到json串中,可以在中间加入一个Bolt传入所需字段,如下。也可直接在上个bolt中提交的时候传入,我这是未整合之前的代码,希望能更清晰让君了解:
public class ReportBolt extends BaseRichBolt {
private OutputCollector collector;
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
}
public void execute(Tuple input) {
String message = input.getString(0);
System.err.println(message);
this.collector.emit(new Values(message, "demo", "xx",UUID.randomUUID().toString()));
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("source", "index", "type", "id"));
}
}
下面是拓扑结构:
public class Topology {
private static final String TOPOLOGY_NAME = "SPAN-DATA-TOPOLOGY";
private static final String KAFKA_SPOUT_ID = "kafka-stream";
public static void main(String[] args) {
ReportBolt report = new ReportBolt();
//根据自身kafka的路径改写,注意Hosts文件中的ip映射
String zks = "k-dev-01:2181/kafka"; // zk主机
String topic = "ntc_ftp_log";
String zkRoot = "/storm";
BrokerHosts brokerHosts = new ZkHosts(zks);
SpoutConfig spoutConf = new SpoutConfig(brokerHosts, topic, zkRoot, KAFKA_SPOUT_ID);
spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
spoutConf.zkServers = Arrays.asList(new String[] { "k-dev-01", "k-dev-02", "k-dev-03" });
spoutConf.zkPort = 2181;
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout(KAFKA_SPOUT_ID, new KafkaSpout(spoutConf), 1);
builder.setBolt("Report", report).globalGrouping(KAFKA_SPOUT_ID);
//传入的是Es集群名称以及各个节点
NewEsConfig esConfig = new NewEsConfig("k18_index",new String[]{ "k-dev-01","k-dev-02","k-dev-03","k-dev-04","k-dev-05" });
EsTupleMapper tupleMapper = new DefaultEsTupleMapper();
NewEsIndexBolt indexBolt = new NewEsIndexBolt(esConfig, tupleMapper);
builder.setBolt("storm-es-bolt", indexBolt, 1).globalGrouping("Report");
Config config = new Config();
config.setNumWorkers(1);
try {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology());
} catch (Exception e) {
e.printStackTrace(); }}}
第一次写博客,理由很单纯,我也是一个小码农,很多东西都是从scdn上找到答案的,程序员很辛苦实话实说,我希望自己也能给其他程序员一些帮助,仅此而已。希望能给大家一些帮助就好。