1.新建一个maven项目——myflink。
2.导入pom.xml依赖
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<flink.version>1.7.2</flink.version>
<kafka.version>2.0.0</kafka.version>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-scala -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-shaded-hadoop-2-uber -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-shaded-hadoop-2-uber</artifactId>
<version>2.4.1-9.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>${kafka.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>${kafka.version}</version>
</dependency>
</dependencies>
3.创建ReadKafka.scala
package cn.alisa.myflink.exp
import java.util.Properties
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer
//消费数据
object ReadKafka {
def main(args: Array[String]): Unit = {
//创建流式数据处理环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
val prop = new Properties()
prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.21.130:9092")
prop.put(ConsumerConfig.GROUP_ID_CONFIG,"alisa")
prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringDeserializer")
prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringDeserializer")
val ds = env.addSource(
//使用FlinkKafkaConsumer以及SimpleStringSchema序列化类,读取kafka数据
new FlinkKafkaConsumer[String](
"user_friends_raw",
new SimpleStringSchema(),
prop
).setStartFromEarliest()
)
//对主题user_friends_raw的数据做一些转换
ds.filter(line=>{
var reg=",$".r
val iter = reg.findAllMatchIn(line)
!iter.hasNext
}).flatMap(line=>{
var info=line.split(",")
info(1).split(" ").map((info(0),_))
}).print()
env.execute("myfk")
}
}
在此之前,需要将zookeeper和kafka服务开启。
zkServer.sh start
kafka-server-start.sh /opt/software/kafka211/config/server.properties
运行结果如下: