1.Flink连接kafka前需要准备步骤
1.1安装kafka
1.2本篇是采取idea编译器,自行准备(记得引入依赖)
idea需要准备的pml.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>a</groupId>
<artifactId>a</artifactId>
<packaging>pom</packaging>
<version>1.0-SNAPSHOT</version>
<modules>
<module>NetworkFlowAnalysis</module>
</modules>
<dependencies>
<!--mysql驱动依赖-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.6</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-cep-scala_2.11</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>1.10.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.bahir</groupId>
<artifactId>flink-connector-redis_2.11</artifactId>
<version>1.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
2. 消费kafka生产的数据
创建流处理环境,创建properties配置对象 配置kafka相关的,然后添加Kafka数据源,由于是消费数据所以在方法中创建消费者,最后打印输出
//创建kafka类型,虚拟机为master或者填写虚拟机地址(需要安装kafka)
val porpertics=new Properties()
porpertics.setProperty("bootstrap.servers","master:9092")
val stream=env.addSource(new FlinkKafkaConsumer011[String]("order",new SimpleStringSchema(),porpertics))
2.1 读取kafka的数据定义样例类且输入
val adLogStream = stream.filter(
action => action.startsWith("L"))
.map(data => {
val arr = data.split(",")
val curTime = System.currentTimeMillis()
AdClickLog(arr(10), arr(1).toLong, arr(2), arr(3), curTime)
})
.assignAscendingTimestamps(_.timestamp )
2.2自己定义处理数据和统计数据
// 开窗聚合统计
val adCountResultStream = adLogStream
.keyBy(_.userId)
.timeWindow(Time.seconds(3),Time.seconds(3))
.aggregate(new AdCountAgg(), new AdCountWindowResult())
adCountResultStream.print()
3. 处理kafka的数据写进MySQL
3.1直接sink,mysql数据库
adCountResultStream.addSink(sink)
class mysqlsink extends RichSinkFunction[AdClickCountByProvince]{
var conn:Connection= _
var insertstream:PreparedStatement=_
var updatasmt:PreparedStatement=_
override def open(parameters: Configuration): Unit = {
conn= DriverManager.getConnection("jdbc:mysql://master:3306/test","root","123456")
insertstream=conn.prepareStatement("insert into order(id,sum) value(?,?)")
updatasmt=conn.prepareStatement("update order set sum=? where id =?")
}
override def invoke(value: AdClickCountByProvince, context: SinkFunction.Context[_]): Unit = {
updatasmt.setString(1,value.windowEnd)
updatasmt.setLong(2,value.count)
updatasmt.execute()
if(updatasmt.getUpdateCount==0){
insertstream.setString(1,value.windowEnd)
insertstream.setLong(2,value.count)
insertstream.execute()
}}
override def close(): Unit = {
insertstream.close()
updatasmt.close()
conn.close()
}
}