文章目录
前言
Kafka是一种高吞吐量的分布式发布订阅消息系统,它可以处理消费者在网站中的所有动作流数据。
一、获取Kafka安装包
wget http://mirror.bit.edu.cn/apache/kafka/2.3.1/kafka_2.11-2.3.1.tgz
二、安装步骤
1.解压文件
tar -zxvf kafka_2.11-2.3.1.tgz
2.重命名
mv kafka_2.11-2.3.1 kafka
3.设置环境变量
vim /etc/profile
export KAFKA_HOME=/opt/kafka
export PATH=$PATH:$KAFKA_HOME/bin
4.使环境变量生效
source /etc/profile
三、启动Kafka
1.先启动ZooKeeper服务
bin/zookeeper-server-start.sh config/zookeeper.properties
2.启动kafka服务
bin/kafka-server-start.sh config/server.properties
3.创建一个名为"topTest"的topic
bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic topTest
4.打开Producer(生产者)服务
bin/kafka-console-producer.sh --broker-list localhost:9092 --topic topTest
5.打开Customer(消费者)服务
bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic topTest--from-beginning
四,Java连接kafka
1.maven依赖
<properties>
<java.version>1.8</java.version>
<flink.version>1.6.1</flink.version>
<scala.binary.version>2.11</scala.binary.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!--flink kafka connector-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
2.实体类
package com.flink.model;
import java.util.Map;
public class Metric {
public String name;
public long timestamp;
public Map<String, Object> fields;
public Map<String, String> tags;
public Metric() {
}
public Metric(String name, long timestamp, Map<String, Object> fields, Map<String, String> tags) {
this.name = name;
this.timestamp = timestamp;
this.fields = fields;
this.tags = tags;
}
@Override
public String toString() {
return "Metric{" +
"name='" + name + '\'' +
", timestamp='" + timestamp + '\'' +
", fields=" + fields +
", tags=" + tags +
'}';
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public long getTimestamp() {
return timestamp;
}
public void setTimestamp(long timestamp) {
this.timestamp = timestamp;
}
public Map<String, Object> getFields() {
return fields;
}
public void setFields(Map<String, Object> fields) {
this.fields = fields;
}
public Map<String, String> getTags() {
return tags;
}
public void setTags(Map<String, String> tags) {
this.tags = tags;
}
}
3.连接工具类
package com.flink.kafka;
import com.alibaba.fastjson.JSON;
import com.flink.model.Metric;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
public class KafkaUtils {
public static final String broker_list = "node1:9092";
public static final String topic = "topTest"; // kafka topic,Flink 程序中需要和这个统一
public static void writeToKafka() throws InterruptedException {
Properties props = new Properties();
props.put("bootstrap.servers", broker_list);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); //key 序列化
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); //value 序列化
KafkaProducer producer = new KafkaProducer<String, String>(props);
Metric metric = new Metric();
metric.setTimestamp(System.currentTimeMillis());
metric.setName("mem");
Map<String, String> tags = new HashMap<>();
Map<String, Object> fields = new HashMap<>();
tags.put("cluster", "阵雨");
tags.put("host_ip", "localhost");
fields.put("used_percent", 90d);
fields.put("max", 27244873d);
fields.put("used", 17244873d);
fields.put("init", 27244873d);
metric.setTags(tags);
metric.setFields(fields);
ProducerRecord record = new ProducerRecord<String, String>(topic, null, null, JSON.toJSONString(metric));
producer.send(record);
System.out.println("发送数据: " + JSON.toJSONString(metric));
producer.flush();
}
public static void main(String[] args) throws InterruptedException {
while (true) {
Thread.sleep(300);
writeToKafka();
}
}
}
4.运行
五,Flink连接读取Kafka
1.主方法类
package com.flink.kafka;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import java.util.Properties;
public class Main {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Properties props = new Properties();
props.put("bootstrap.servers", "node1:9092");
props.put("zookeeper.connect", "node1:2181");
props.put("group.id", "metric-group");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); //key 反序列化
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("auto.offset.reset", "latest"); //value 反序列化
DataStreamSource<String> dataStreamSource = env.addSource(new FlinkKafkaConsumer011<>(
"topTest", //kafka topic
new SimpleStringSchema(), // String 序列化
props)).setParallelism(1);
dataStreamSource.print(); //把从 kafka 读取到的数据打印在控制台
env.execute("Flink add data source");
}
}
2.运行
总结
本文总结了linux安装kafka的步骤,以及Flink通过Java连接读取Kafka的方式。