先丢个官网链接
本章博客依赖官网
第一步,先放依赖:
//为了防止冲突,我们用exclusion将hadoopClient和hadoopAuth排除
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-hdfs</artifactId>
<version>${storm.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
</exclusion>
</exclusions>
</dependency>
在这里我们只需要编写一个spout接收数据,和一个bolt进行wordcount数据即可,逻辑与之前Storm整合Redis一致,这边直接贴代码
编写第一个spout
public static class DataSourceSpout extends BaseRichSpout {
//由于需要发送数据,所以定义一个SpoutOutputCollector
private SpoutOutputCollector collector;
@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
//初始化SpoutOutputCollector
this.collector = collector;
}
//这边我们自定义一个数组
public static final String[] words = new String[]{"apple","banana","orange","strawberry",};
@Override
public void nextTuple() {
//我们通过随机读取数组的方式生成数据进行测试
Random random = new Random();
String word = words[random.nextInt(words.length)];
//这边为了防止刷屏,我们设定sleep一秒
Utils.sleep(1000);
//发送数据
this.collector.emit(new Values(word));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
//声明输出字段,和上面发送的数据相对应
declarer.declare(new Fields("word"));
}
}
编写下一个bolt
public static class CountWords extends BaseRichBolt{
//由于需要发送数据,所以定义一个SpoutOutputCollector
private OutputCollector collector;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
//初始化SpoutOutputCollector
this.collector = collector;
}
//定义一个map存放数据
Map<String,Integer> map = new HashMap<>();
@Override
public void execute(Tuple input) {
//抓取上面发送过来的数据
String word = input.getStringByField("word");
Integer i = map.get(word);
if (i == null){
i = 0;
}
i++;
map.put(word,i);
//这边输出遗传字符串方便测试
System.out.println("emit : "+word + " "+map.get(word));
//将word和count发送出去
this.collector.emit(new Values(word,map.get(word)));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
//注意这边发送的字段名必须与数据库的字段名一致
declarer.declare(new Fields("word","count"));
}
}
重点在于main方法的编写:
结合官网:
public static void main(String[] args) {
//这部分代码参考官网
// 字段之间的分隔符设置
RecordFormat format = new DelimitedRecordFormat()
.withFieldDelimiter("|");
// 每隔几条写操作一次,测试方便改成10
SyncPolicy syncPolicy = new CountSyncPolicy(10);
// 设置每个文件大小为5mb
FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB);
//设置默认存在hdfs的位置
FileNameFormat fileNameFormat = new DefaultFileNameFormat()
.withPath("/foo/");
//通过new一个HdfsBolt将上面配置的参数整合
HdfsBolt bolt = new HdfsBolt()
.withFsUrl("hdfs://localhost:8020")
.withFileNameFormat(fileNameFormat)
.withRecordFormat(format)
.withRotationPolicy(rotationPolicy)
.withSyncPolicy(syncPolicy);
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("DataSourceSpout",new DataSourceSpout());
builder.setBolt("CountWords",new CountWords()).shuffleGrouping("DataSourceSpout");
//将上面新建的bolt设置进去
builder.setBolt("HdfsBolt",bolt).shuffleGrouping("CountWords");
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("LocalWCStormHDFSTop",new Config(),builder.createTopology());
}
运行查看hdfs-ui
使用命令查看内容
这样我们的Storm整合HDFS就完成了