使用Java API创建消息kafka的producer,向定义的topic中发送消息数据
注意:模拟Kafka接收消息数据并将消息数据发送出去的整个过程,需先将此程序运行起来
package com.xnmzdx.storm.kafka;
import java.util.Properties;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
public class WordProduce {
public static void main(String[] args) {
// 创建配置对象
Properties props = new Properties();
// 设置broker连接
props.put("bootstrap.servers", "storm01:9092");
// 设置确认 all:全部确认,1:leader确认,n:n个副本确认
props.put("acks", "1");
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
// 设置value序列化类
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
// 创建生产者对象
Producer<String, String> producer = new KafkaProducer<String, String>(props);
// 发送消息
for (String word : message.split(" ")) {
// 创建消息对象
ProducerRecord<String, String> record = new ProducerRecord<String, String>("words_topic", word);
// 发送消息
producer.send(record);
}
//关闭生产者
producer.close();
}
//手动定义发送的消息数据
private static String message =
"One morning, when Gregor samsa woke from troubled dreams, "
+ "he found himself transformed in his bed into a horrible "
+ "vermin. He lay on his armour-like back, and if he lifted "
+ "his head a little he could see his brown belly, slightly "
+ "domed and divided by arches into stiff sections.";
/*private static String message2 =
"hello storm hello hadoop hello scala "
+ "i love you storm and hadoop "
+ "i learn hadoop and scala";*/
}
storm从Kafka中获取消息数据,做处理:判断所获得的消息数据是否是完整的句子,是完整的句子就打印出来,否则不打印。
判断从kafka获得的消息数据是否是完整句子(是否以.(英文状态的句号)结尾),是完整句子则继续向下发送数据
package com.xnmzdx.storm.kafka;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import com.google.common.collect.ImmutableList;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
public class SentenceBolt extends BaseBasicBolt {
private static final long serialVersionUID = -6793074915131597295L;
//words用于存放单词
private List<String> words = new ArrayList<String>();
public void execute(Tuple input, BasicOutputCollector collector) {
//接收数据
String word = input.getString(0);
//判断数据是否为空
if(StringUtils.isBlank(word)) {
return;
}
System.out.println("Received word:"+word);
//把数据放到List集合中
words.add(word);
//判断接收到是数据是否是.结尾
if(word.endsWith(".")) {//判断一句话是否结束,如果以.结尾,则语句结束
//发送数据。将集合中的每个数据都加上空格
//StringUtils.join 将数组或集合以某拼接符拼接到一起形成新的字符串
collector.emit(ImmutableList.of((Object)StringUtils.join(words,' ')));
//清空集合
words.clear();
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
//声明发送数据的字段名称
declarer.declare(new Fields("sentence"));
}
}
打印接收到的数据
package com.xnmzdx.storm.kafka;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Tuple;
public class PrinterBolt extends BaseBasicBolt {
private static final long serialVersionUID = 34881452771257392L;
public void execute(Tuple input, BasicOutputCollector collector) {
//接收数据
String sentence = input.getString(0);
//打印数据
System.out.println("Received Sentence:"+sentence);
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
storm的topology类,运行此类开始从Kafka中接收数据并做处理
package com.xnmzdx.storm.kafka;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.TopologyBuilder;
import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;
/**
* storm与kafka集成
* @author zyt
*
*/
public class KafkaTopology {
public static void main(String[] args) {
//kafka使用的zk地址
ZkHosts zkHosts = new ZkHosts("storm01:2181,storm02:2181,storm03:2181");
//2参数,指定kafka的topic 3参数指定ZK的一个根目录,是kafka spout读取数据的位置信息(offset)
SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, "words_topic", "", "group1");
//指定kafka消息使用StringScheme
kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
//从头开始消费
kafkaConfig.forceFromStart = true;
TopologyBuilder builder = new TopologyBuilder();
//设置kafka数据源
builder.setSpout("KafkaSpout", new KafkaSpout(kafkaConfig),1);
builder.setBolt("SentenceBolt", new SentenceBolt(),1).globalGrouping("KafkaSpout");
builder.setBolt("PrinterBolt", new PrinterBolt(),1).globalGrouping("SentenceBolt");
LocalCluster cluster = new LocalCluster();
Config conf = new Config();
cluster.submitTopology("KafkaTopology", conf, builder.createTopology());
System.out.println("Waiting to consume from kafka");
try {
Thread.sleep(10000);
} catch (InterruptedException e) {
e.printStackTrace();
}
cluster.killTopology("KafkaTopology");
cluster.shutdown();
}
}