Storm-Druid源码地址为:https://github.com/apache/storm/tree/master/external/storm-druid
在Storm与Druid结合;
需要在pom.xml文件中引入
<!-- https://mvnrepository.com/artifact/org.apache.storm/storm-druid -->
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-druid</artifactId>
<version>1.1.2</version>
</dependency>
举例代码如下:
SampleDruidBoltTopology创建Topology结构,其中可以直接使用storm-druid扩展模块中的DruidBeamBolt组件,在本代码中通过创建Beam已经完成了启动配置的生成.不需要再加载json启动配置了.
public class SampleDruidBoltTopology {
public static void main(String[] args) throws Exception {
if(args.length == 0) {
throw new IllegalArgumentException("There should be at least one argument. Run as `SampleDruidBoltTopology <zk-url>`");
}
TopologyBuilder topologyBuilder = new TopologyBuilder();
topologyBuilder.setSpout("event-gen", new SimpleSpout(), 5);
DruidBeamFactory druidBeamFactory = new SampleDruidBeamFactoryImpl(new HashMap<String, Object>());
DruidConfig.Builder builder = DruidConfig.newBuilder().discardStreamId(DruidConfig.DEFAULT_DISCARD_STREAM_ID);
ITupleDruidEventMapper<Map<String, Object>> eventMapper = new TupleDruidEventMapper<>(TupleDruidEventMapper.DEFAULT_FIELD_NAME);
DruidBeamBolt<Map<String, Object>> druidBolt = new DruidBeamBolt<Map<String, Object>>(druidBeamFactory, eventMapper, builder);
topologyBuilder.setBolt("druid-bolt", druidBolt).shuffleGrouping("event-gen");
topologyBuilder.setBolt("printer-bolt", new PrinterBolt()).shuffleGrouping("druid-bolt" , DruidConfig.DEFAULT_DISCARD_STREAM_ID);
Config conf = new Config();
conf.setDebug(true);
conf.put("druid.tranquility.zk.connect", args[0]);
if (args.length > 1) {
conf.setNumWorkers(3);
StormSubmitter.submitTopologyWithProgressBar(args[1], conf, topologyBuilder.createTopology());
} else {
conf.setMaxTaskParallelism(3);
try (LocalCluster cluster = new LocalCluster();
LocalTopology topo = cluster.submitTopology("druid-test", conf, topologyBuilder.createTopology());) {
Thread.sleep(30000);
}
System.exit(0);
}
}
private static class PrinterBolt extends BaseBasicBolt {
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
System.out.println(tuple);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer ofd) {
}
}
}
SampleDruidBeamFactoryImpl类需要实现DruidBeamFactory主要实现Druid的底层ApI来灵活配置启动任务的配置
public class SampleDruidBeamFactoryImpl implements DruidBeamFactory<Map<String, Object>> {
Map<String, Object> factoryConf = null;
public SampleDruidBeamFactoryImpl(Map<String, Object> factoryConf) {
this.factoryConf = factoryConf; // This can be used to pass config values
}
@Override
public Beam<Map<String, Object>> makeBeam(Map<?, ?> conf, IMetricsContext metrics) {
final String indexService = "druid/overlord"; // Your overlord's druid.service
final String discoveryPath = "/druid/discovery"; // Your overlord's druid.discovery.curator.path
final String dataSource = "test";
final List<String> dimensions = ImmutableList.of("publisher", "advertiser");
List<AggregatorFactory> aggregator = ImmutableList.<AggregatorFactory>of(
new CountAggregatorFactory(
"click"
)
);
// Tranquility needs to be able to extract timestamps from your object type (in this case, Map<String, Object>).
final Timestamper<Map<String, Object>> timestamper = new Timestamper<Map<String, Object>>()
{
@Override
public DateTime timestamp(Map<String, Object> theMap)
{
return new DateTime(theMap.get("timestamp"));
}
};
// Tranquility uses ZooKeeper (through Curator) for coordination.
final CuratorFramework curator = CuratorFrameworkFactory
.builder()
.connectString((String)conf.get("druid.tranquility.zk.connect")) // we can use Storm conf to get config values
.retryPolicy(new ExponentialBackoffRetry(1000, 20, 30000))
.build();
curator.start();
// The JSON serialization of your object must have a timestamp field in a format that Druid understands. By default,
// Druid expects the field to be called "timestamp" and to be an ISO8601 timestamp.
final TimestampSpec timestampSpec = new TimestampSpec("timestamp", "auto", null);
// Tranquility needs to be able to serialize your object type to JSON for transmission to Druid. By default this is
// done with Jackson. If you want to provide an alternate serializer, you can provide your own via ```.objectWriter(...)```.
// In this case, we won't provide one, so we're just using Jackson.
final Beam<Map<String, Object>> beam = DruidBeams
.builder(timestamper)
.curator(curator)
.discoveryPath(discoveryPath)
.location(DruidLocation.create(indexService, dataSource))
.timestampSpec(timestampSpec)
.rollup(DruidRollup.create(DruidDimensions.specific(dimensions), aggregator, QueryGranularities.MINUTE))
.tuning(
ClusteredBeamTuning
.builder()
.segmentGranularity(Granularity.HOUR)
.windowPeriod(new Period("PT10M"))
.partitions(1)
.replicants(1)
.build()
)
.druidBeamConfig(
DruidBeamConfig
.builder()
.indexRetryPeriod(new Period("PT10M"))
.build())
.buildBeam();
return beam;
}
}
另外组件Spout可以会根据自己的需求来实现不同数据源的数据获取和发送到下游
具体例子参考地址为:
https://github.com/apache/storm/tree/master/external/storm-druid/src/test/java/org/apache/storm/druid