一、试验环境
Windows:IDEA
Linux:Kafka,Zookeeper
二、POM和Demo
2.1、pom.xml文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.test</groupId>
<artifactId>flinkTestConsumeKafka</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<flink.version>1.7.2</flink.version>
<slf4j.version>1.7.7</slf4j.version>
<log4j.version>1.2.17</log4j.version>
</properties>
<dependencies>
<!--******************* flink *******************-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
<version>${flink.version}</version>
<scope> compile</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-filesystem_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-core</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.3</version>
</dependency>
<!--alibaba fastjson-->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.51</version>
</dependency>
<!--******************* 日志 *******************-->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>${log4j.version}</version>
<scope>runtime</scope>
</dependency>
<!--******************* kafka *******************-->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.3</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<!--打jar包-->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>com.allen.capturewebdata.Main</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
</plugins>
</build>
</project>
2.2、Java类编辑
2.2.1、KafkaDemo 类
package flink.kafkaFlink;
import java.util.Properties;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
public class KafkaDemo {
public static void main(String[] args) throws Exception {
// set up the streaming execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//默认情况下,检查点被禁用。要启用检查点,请在StreamExecutionEnvironment上调用enableCheckpointing(n)方法,
// 其中n是以毫秒为单位的检查点间隔。每隔5000 ms进行启动一个检查点,则下一个检查点将在上一个检查点完成后5秒钟内启动
env.enableCheckpointing(500);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "singlecluster:9092");//kafka的节点的IP或者hostName,多个使用逗号分隔
properties.setProperty("zookeeper.connect", "singlecluster:2181");//zookeeper的节点的IP或者hostName,多个使用逗号进行分隔
properties.setProperty("group.id", "test-consumer-group");//flink consumer flink的消费者的group.id
System.out.println("11111111111");
FlinkKafkaConsumer010<String> myConsumer = new FlinkKafkaConsumer010<String>("apache-flink-test", new org.apache.flink.api.common.serialization.SimpleStringSchema(), properties);
// FlinkKafkaConsumer010<String> myConsumer = new FlinkKafkaConsumer010<String>("test",new SimpleStringSchema(),properties);//test0是kafka中开启的topic
myConsumer.assignTimestampsAndWatermarks(new CustomWatermarkEmitter());
DataStream<String> keyedStream = env.addSource(myConsumer);//将kafka生产者发来的数据进行处理,本例子我进任何处理
System.out.println("2222222222222");
keyedStream.print();//直接将从生产者接收到的数据在控制台上进行打印
// execute program
System.out.println("3333333333333");
env.execute("Flink Streaming Java API Skeleton");
}
}
2.2.2、CustomWatermarkEmitter 类
package flink.kafkaFlink;
import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks;
import org.apache.flink.streaming.api.watermark.Watermark;
public class CustomWatermarkEmitter implements AssignerWithPunctuatedWatermarks<String> {
private static final long serialVersionUID = 1L;
public long extractTimestamp(String arg0, long arg1) {
if (null != arg0 && arg0.contains(",")) {
String parts[] = arg0.split(",");
return Long.parseLong(parts[0]);
}
return 0;
}
public Watermark checkAndGetNextWatermark(String arg0, long arg1) {
if (null != arg0 && arg0.contains(",")) {
String parts[] = arg0.split(",");
return new Watermark(Long.parseLong(parts[0]));
}
return null;
}
}
三、在Linux主机上启动服务
3.1、启动zookeeper和kafka(这里,zookeeper集成于kafka中)
cd /home/bigdata/kafka
3.1.1、启动zookeeper(后台运行)
nohup bin/zookeeper-server-start.sh config/zookeeper.properties &
查看zookeeper是否运行
jps
6740 QuorumPeerMain
关闭zookeeper
bin/zookeeper-server-stop.sh config/zookeeper.properties &
3.1.2、启动kafka(后台运行)
nohup bin/kafka-server-start.sh config/server.properties &
或者
nohup bin/kafka-server-start.sh config/server.properties >/dev/null 2>&1 &
查看kafka是否运行
jps
7587 Kafka
关闭kafka
bin/kafka-server-stop.sh config/server.properties
3.2、创建kafka topic
/home/bigdata/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic apache-flink-test
3.3、启动kafka生产者
/home/bigdata/kafka/bin/kafka-console-producer.sh --broker-list localhost:9092 --topic apache-flink-test
3.4、 kafka 常用命令
(1)、创建topic:
/home/bigdata/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic apache-flink-test
(2)、查看topic:
/home/bigdata/kafka/bin/kafka-topics.sh --list --zookeeper localhost:2181
(3)、生产者
/home/bigdata/kafka/bin/kafka-console-producer.sh --broker-list localhost:9092 --topic apache-flink-test
(4)、消费者
/home/bigdata/kafka/bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic apache-flink-test --from-beginning
kafka中消费 kafka topic 后应该关闭消费进程
(1)使用消费命令时,用 Ctrl + C 关闭消费进程
(2)jps -m 查看kafka消费进程号,之后杀死对应的进程
jps -m
kill -9 进程号
(5)、删除topic
/home/bigdata/kafka/bin/kafka-topics --delete --zookeeper 【zookeeper server:port】 --topic 【topic name】
[server.properties需要 设置delete.topic.enable=true]
四、运行程序KafkaDemo
4.1、在Linux服务器上kafka的生产者界面输入内容
4.2、查看IDEA的控制台
参考链接:
Flink+Kafka 接收流数据并打印到控制台 - 吹静静 - 博客园
https://www.cnblogs.com/chuijingjing/p/10535081.html