kafka-storm-es

kafka-storm-es这条线其实就两个结点,kafka-storm还行,storm-es我主要是参考了

https://blog.csdn.net/yl3395017/article/details/77496034?utm_source=gold_browser_extension这篇博客,因为里面缺失了很多东西,而且我的代码也有些改动,在这里将自己走过的坑分享一下,希望对君有用。

我用的storm是1.1.0版本的,es是5.6.10版本的。

我的pom.xml文件:

<dependencies>

         <dependency>

              <groupId>junit</groupId>

              <artifactId>junit</artifactId>

              <version>3.8.1</version>

              <scope>test</scope>

         </dependency>

         <dependency>

              <groupId>org.apache.storm</groupId>

              <artifactId>storm-core</artifactId>

              <version>1.1.0</version>

         </dependency>

         <dependency>

              <groupId>org.apache.storm</groupId>

              <artifactId>storm-kafka</artifactId>

              <version>1.1.0</version>

         </dependency>

         <dependency>

              <groupId>org.apache.storm</groupId>

              <artifactId>storm-kafka-client</artifactId>

              <version>1.1.0</version>

         </dependency>

         <dependency>

              <groupId>org.apache.kafka</groupId>

              <artifactId>kafka_2.11</artifactId>

              <version>0.9.0.0</version>

              <exclusions>

                   <exclusion>

                       <groupId>org.apache.zookeeper</groupId>

                       <artifactId>zookeeper</artifactId>

                   </exclusion>

                   <exclusion>

                       <groupId>org.slf4j</groupId>

                       <artifactId>slf4j-log4j12</artifactId>

                   </exclusion>

                  <exclusion>

                       <groupId>log4j</groupId>

                       <artifactId>log4j</artifactId>

                   </exclusion>

              </exclusions>

         </dependency>

         <dependency>

              <groupId>org.apache.kafka</groupId>

              <artifactId>kafka-clients</artifactId>

              <version>0.10.0.1</version>

         </dependency>

         <dependency>

              <groupId>org.apache.storm</groupId>

              <artifactId>storm-elasticsearch</artifactId>

              <version>1.1.0</version>

         </dependency>

         <dependency>

              <groupId>net.sf.json-lib</groupId>

              <artifactId>json-lib</artifactId>

              <version>2.4</version>

              <classifier>jdk15</classifier>

         </dependency>

         <dependency>

              <groupId>org.elasticsearch</groupId>

              <artifactId>elasticsearch</artifactId>

              <version>5.6.1</version>

         </dependency>

         <dependency>

              <groupId>org.elasticsearch.client</groupId>

              <artifactId>transport</artifactId>

              <version>5.6.1</version>

         </dependency>

         <dependency>

              <groupId>org.elasticsearch.client</groupId>

              <artifactId>x-pack-transport</artifactId>

              <version>5.6.1</version>

         </dependency>

         <dependency>

              <groupId>org.springframework.data</groupId>

              <artifactId>spring-data-elasticsearch</artifactId>

              <version>3.0.2.RELEASE</version>

         </dependency>

     </dependencies>

     <build>

         <plugins>

              <plugin>

                   <artifactId>maven-assembly-plugin</artifactId>

                   <configuration>

                       <descriptorRefs>

                            <descriptorRef>jar-with-dependencies</descriptorRef>

                       </descriptorRefs>

                       <archive>

                            <manifest>

                                 <mainClass>cn.ljh.storm.helloworld.ExclamationTopology</mainClass>

                            </manifest>

                       </archive>

                   </configuration>

                   <executions>

                       <execution>

                            <id>make-assembly</id>

                            <phase>package</phase>

                            <goals>

                                 <goal>single</goal>

                            </goals>

                       </execution>

                   </executions>

              </plugin>

         </plugins>

     </build>

改写的ES中的4个类有:

public abstract class NewAbstractEsBolt extends BaseRichBolt {

     private static final Logger LOG = LoggerFactory.getLogger(NewAbstractEsBolt.class);

     protected static Client client;

     protected OutputCollector collector;

     private NewEsConfig esConfig;

     public NewAbstractEsBolt(NewEsConfig esConfig) {

         this.esConfig = esConfig;

     }

     public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {

         try {

              this.collector = outputCollector;

              synchronized (NewAbstractEsBolt.class) {

                   if (client == null) {

                       client = new NewStormElasticSearchClient(esConfig).construct();

                   }

              }

         } catch (Exception e) {

              LOG.warn("unable to initialize EsBolt ", e);

         }

     }

     public abstract void execute(Tuple tuple);

     public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {

     }

     static Client getClient() {

         return NewAbstractEsBolt.client;

     }

     static void replaceClient(Client client) {

         NewAbstractEsBolt.client = client;

     }

}

public class NewEsConfig implements Serializable {

     private final String clusterName;

     private final String[] nodes;

     private final Map<String, String> additionalConfiguration;

     public NewEsConfig(String clusterName, String[] nodes) {

         this(clusterName, nodes, Collections.<String, String>emptyMap());

     }

     public NewEsConfig(String clusterName, String[] nodes, Map<String, String> additionalConfiguration) {

         this.clusterName = clusterName;

         this.nodes = nodes;

         this.additionalConfiguration = new HashMap(additionalConfiguration);

     }

 

     TransportAddress[] getTransportAddresses() {

         String[] ns = nodes;

         TransportAddress[] addressArr = new TransportAddress[ns.length];

         for (int i = 0; i < ns.length; i++) {

              try {

                   addressArr[i] = new InetSocketTransportAddress(InetAddress.getByName(ns[i]), 9300);

              } catch (UnknownHostException e) {

                   e.printStackTrace();

              }

         }

         return addressArr;

     }

     Settings toBasicSettings() {

         return Settings.builder()

                   .put("cluster.name",clusterName)

                   .put("transport.tcp.compress", true)

                   .build();

     }

}

public class NewEsIndexBolt extends NewAbstractEsBolt {

     private final EsTupleMapper tupleMapper;

     public NewEsIndexBolt(NewEsConfig esConfig, EsTupleMapper tupleMapper) {

         super(esConfig);

         this.tupleMapper = tupleMapper;

     }

     public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {

         super.prepare(map, topologyContext, outputCollector);

     }

     public void execute(Tuple tuple) {

         try {

              String source = tupleMapper.getSource(tuple);

//如果想要定期产生index,在此改动,比如 new SimpleDateFormat("YYYYMMddHH").format(new Date()).toString();按小时生成,在前方bolt传过来倒是不行,具体原因不知。

              String index = tupleMapper.getIndex(tuple);

              String type = tupleMapper.getType(tuple);

              String id = tupleMapper.getId(tuple);

              client.prepareIndex(index, type, id).setSource(source).execute().actionGet();

              collector.ack(tuple);

         } catch (Exception e) {

              collector.reportError(e);

              collector.fail(tuple);

         }

     }

     public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {

     }

}

public final class NewStormElasticSearchClient implements Serializable {

    private final NewEsConfig esConfig;

    public NewStormElasticSearchClient(NewEsConfig esConfig) {

        this.esConfig = esConfig;

    }

    public Client construct() {

        Settings settings = esConfig.toBasicSettings();

        TransportClient transportClient =new PreBuiltXPackTransportClient(settings).addTransportAddresses(esConfig.getTransportAddresses());

        return transportClient;

    }

}

在storm-es时,需要获取source,index,type,id四个字段,但不能直接插入到json串中,可以在中间加入一个Bolt传入所需字段,如下。也可直接在上个bolt中提交的时候传入,我这是未整合之前的代码,希望能更清晰让君了解:

public class ReportBolt extends BaseRichBolt {

     private OutputCollector collector;

     public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {

         this.collector = collector;

     }

 

     public void execute(Tuple input) {

         String message = input.getString(0);

         System.err.println(message);

         this.collector.emit(new Values(message, "demo", "xx",UUID.randomUUID().toString()));

     }

     public void declareOutputFields(OutputFieldsDeclarer declarer) {

         declarer.declare(new Fields("source", "index", "type", "id"));

     }

}

下面是拓扑结构:

public class Topology {

     private static final String TOPOLOGY_NAME = "SPAN-DATA-TOPOLOGY";

     private static final String KAFKA_SPOUT_ID = "kafka-stream";

     public static void main(String[] args) {

         ReportBolt report = new ReportBolt();

//根据自身kafka的路径改写,注意Hosts文件中的ip映射

         String zks = "k-dev-01:2181/kafka"; // zk主机

         String topic = "ntc_ftp_log";

         String zkRoot = "/storm";

         BrokerHosts brokerHosts = new ZkHosts(zks);

         SpoutConfig spoutConf = new SpoutConfig(brokerHosts, topic, zkRoot, KAFKA_SPOUT_ID);

         spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());

         spoutConf.zkServers = Arrays.asList(new String[] { "k-dev-01", "k-dev-02", "k-dev-03" });

         spoutConf.zkPort = 2181;

         TopologyBuilder builder = new TopologyBuilder();

         builder.setSpout(KAFKA_SPOUT_ID, new KafkaSpout(spoutConf), 1);

         builder.setBolt("Report", report).globalGrouping(KAFKA_SPOUT_ID);

//传入的是Es集群名称以及各个节点

         NewEsConfig esConfig = new NewEsConfig("k18_index",new String[]{  "k-dev-01","k-dev-02","k-dev-03","k-dev-04","k-dev-05" });

         EsTupleMapper tupleMapper = new DefaultEsTupleMapper();

         NewEsIndexBolt indexBolt = new NewEsIndexBolt(esConfig, tupleMapper);

         builder.setBolt("storm-es-bolt", indexBolt, 1).globalGrouping("Report");

         Config config = new Config();

         config.setNumWorkers(1);

         try {

              LocalCluster cluster = new LocalCluster();

              cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology());

         } catch (Exception e) {

              e.printStackTrace(); }}}

第一次写博客,理由很单纯,我也是一个小码农,很多东西都是从scdn上找到答案的,程序员很辛苦实话实说,我希望自己也能给其他程序员一些帮助,仅此而已。希望能给大家一些帮助就好。

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值