Flink1.7.2+Kafka 接收流数据并打印到控制台

一、试验环境

Windows:IDEA

Linux:Kafka,Zookeeper

二、POM和Demo

2.1、pom.xml文件

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
		 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
		 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>com.test</groupId>
	<artifactId>flinkTestConsumeKafka</artifactId>
	<version>1.0-SNAPSHOT</version>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
		<flink.version>1.7.2</flink.version>
		<slf4j.version>1.7.7</slf4j.version>
		<log4j.version>1.2.17</log4j.version>
	</properties>

	<dependencies>
		<!--******************* flink *******************-->
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-java</artifactId>
			<version>${flink.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-streaming-java_2.11</artifactId>
			<version>${flink.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-clients_2.11</artifactId>
			<version>${flink.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
			<version>${flink.version}</version>
			<scope> compile</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-connector-filesystem_2.11</artifactId>
			<version>${flink.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-core</artifactId>
			<version>${flink.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-hdfs</artifactId>
			<version>2.7.3</version>
		</dependency>

		<!--alibaba fastjson-->
		<dependency>
			<groupId>com.alibaba</groupId>
			<artifactId>fastjson</artifactId>
			<version>1.2.51</version>
		</dependency>
		<!--******************* 日志 *******************-->
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-log4j12</artifactId>
			<version>${slf4j.version}</version>
			<scope>runtime</scope>
		</dependency>
		<dependency>
			<groupId>log4j</groupId>
			<artifactId>log4j</artifactId>
			<version>${log4j.version}</version>
			<scope>runtime</scope>
		</dependency>
		<!--******************* kafka *******************-->
		<dependency>
			<groupId>org.apache.kafka</groupId>
			<artifactId>kafka-clients</artifactId>
			<version>1.1.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-streaming-java_2.11</artifactId>
			<version>${flink.version}</version>
		</dependency>

	</dependencies>

	<build>
		<plugins>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<version>3.3</version>
				<configuration>
					<source>1.8</source>
					<target>1.8</target>
				</configuration>
			</plugin>
			<!--打jar包-->
			<plugin>
				<artifactId>maven-assembly-plugin</artifactId>
				<configuration>
					<archive>
						<manifest>
							<mainClass>com.allen.capturewebdata.Main</mainClass>
						</manifest>
					</archive>
					<descriptorRefs>
						<descriptorRef>jar-with-dependencies</descriptorRef>
					</descriptorRefs>
				</configuration>
			</plugin>
		</plugins>
	</build>


</project>

2.2、Java类编辑

2.2.1、KafkaDemo 类

package flink.kafkaFlink;

import java.util.Properties;

import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;

public class KafkaDemo {

	public static void main(String[] args) throws Exception {

		// set up the streaming execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		//默认情况下,检查点被禁用。要启用检查点,请在StreamExecutionEnvironment上调用enableCheckpointing(n)方法,
		// 其中n是以毫秒为单位的检查点间隔。每隔5000 ms进行启动一个检查点,则下一个检查点将在上一个检查点完成后5秒钟内启动

		env.enableCheckpointing(500);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		Properties properties = new Properties();
		properties.setProperty("bootstrap.servers", "singlecluster:9092");//kafka的节点的IP或者hostName,多个使用逗号分隔
		properties.setProperty("zookeeper.connect", "singlecluster:2181");//zookeeper的节点的IP或者hostName,多个使用逗号进行分隔
		properties.setProperty("group.id", "test-consumer-group");//flink consumer flink的消费者的group.id
		System.out.println("11111111111");
		FlinkKafkaConsumer010<String> myConsumer = new FlinkKafkaConsumer010<String>("apache-flink-test", new org.apache.flink.api.common.serialization.SimpleStringSchema(), properties);
		// FlinkKafkaConsumer010<String> myConsumer = new FlinkKafkaConsumer010<String>("test",new SimpleStringSchema(),properties);//test0是kafka中开启的topic
		myConsumer.assignTimestampsAndWatermarks(new CustomWatermarkEmitter());
		DataStream<String> keyedStream = env.addSource(myConsumer);//将kafka生产者发来的数据进行处理,本例子我进任何处理
		System.out.println("2222222222222");
		keyedStream.print();//直接将从生产者接收到的数据在控制台上进行打印
		// execute program
		System.out.println("3333333333333");
		env.execute("Flink Streaming Java API Skeleton");

	}
}

 

2.2.2、CustomWatermarkEmitter 类

package flink.kafkaFlink;

import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks;
import org.apache.flink.streaming.api.watermark.Watermark;

public class CustomWatermarkEmitter implements AssignerWithPunctuatedWatermarks<String> {

    private static final long serialVersionUID = 1L;

    public long extractTimestamp(String arg0, long arg1) {
        if (null != arg0 && arg0.contains(",")) {
            String parts[] = arg0.split(",");
            return Long.parseLong(parts[0]);
        }
        return 0;
    }

    public Watermark checkAndGetNextWatermark(String arg0, long arg1) {
        if (null != arg0 && arg0.contains(",")) {
            String parts[] = arg0.split(",");
            return new Watermark(Long.parseLong(parts[0]));
        }
        return null;
    }
}

 

三、在Linux主机上启动服务

3.1、启动zookeeper和kafka(这里,zookeeper集成于kafka中)

cd /home/bigdata/kafka

3.1.1、启动zookeeper(后台运行)

nohup bin/zookeeper-server-start.sh config/zookeeper.properties &

查看zookeeper是否运行

jps

6740 QuorumPeerMain

关闭zookeeper

bin/zookeeper-server-stop.sh config/zookeeper.properties &

 

3.1.2、启动kafka(后台运行)

nohup bin/kafka-server-start.sh config/server.properties &

或者

nohup bin/kafka-server-start.sh config/server.properties >/dev/null 2>&1 &

查看kafka是否运行

jps

7587 Kafka

关闭kafka

bin/kafka-server-stop.sh config/server.properties

 

3.2、创建kafka topic

/home/bigdata/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic apache-flink-test

 

3.3、启动kafka生产者

/home/bigdata/kafka/bin/kafka-console-producer.sh --broker-list localhost:9092 --topic apache-flink-test

 

3.4、 kafka 常用命令

(1)、创建topic:

  /home/bigdata/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic apache-flink-test

(2)、查看topic:

  /home/bigdata/kafka/bin/kafka-topics.sh --list --zookeeper localhost:2181

(3)、生产者

  /home/bigdata/kafka/bin/kafka-console-producer.sh --broker-list localhost:9092 --topic apache-flink-test

(4)、消费者

/home/bigdata/kafka/bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic apache-flink-test --from-beginning

 

kafka中消费 kafka topic 后应该关闭消费进程

(1)使用消费命令时,用 Ctrl + C 关闭消费进程

(2)jps -m 查看kafka消费进程号,之后杀死对应的进程

jps -m

kill -9 进程号

(5)、删除topic

  /home/bigdata/kafka/bin/kafka-topics --delete --zookeeper 【zookeeper server:port】 --topic 【topic name】

  [server.properties需要 设置delete.topic.enable=true]

 

四、运行程序KafkaDemo

4.1、在Linux服务器上kafka的生产者界面输入内容

4.2、查看IDEA的控制台

 

 

参考链接:

Flink+Kafka 接收流数据并打印到控制台 - 吹静静 - 博客园
https://www.cnblogs.com/chuijingjing/p/10535081.html

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值