查阅无数资料,爬了无数个坑!!!
整体思路:把SpringBoot当成一个任务放进Flink集群中运行,并且该任务会一直运行,当其他任务需要执行时只需要调用SpringBoot的接口来动态生成任务,可以把每一个接口都当成一个任务,调用接口时Flink会根据当前环境动态创建任务并执行
注意事项: 使用 ./flink run 命令以后台运行的方式去运行打好的jar包
一、引入以下依赖
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<java.version>1.8</java.version>
<flink.version>1.13.0</flink.version>
<scala.binary.version>2.12</scala.binary.version>
<slf4j.version>1.7.30</slf4j.version>
<mysql.version>5.1.47</mysql.version>
<spring.boot.version>2.0.3.RELEASE</spring.boot.version>
</properties>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.1.1.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
<version>${spring.boot.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.8</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>${mysql.version}</version>
</dependency>
</dependencies>
<build>
<finalName>flink</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.4</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
<artifactSet>
<excludes>
<exclude>com.google.code.findbugs:jsr305</exclude>
<exclude>org.slf4j:*</exclude>
<exclude>log4j:*</exclude>
</excludes>
</artifactSet>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>module-info.class</exclude>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>META-INF/spring.handlers</resource>
<resource>reference.conf</resource>
</transformer>
<transformer
implementation="org.springframework.boot.maven.PropertiesMergingResourceTransformer">
<resource>META-INF/spring.factories</resource>
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>META-INF/spring.schemas</resource>
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>cn.sdata.FlinkBootApplication</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
二、yml配置
server:
port: 10001
三、SpringBoot启动类
@SpringBootApplication
public class FlinkBootApplication {
public static void main(String[] args) {
SpringApplication.run(FlinkBootApplication.class,args);
//加一个死循环来保证main方法不会停止
while (true) {
Thread.sleep(30000);
}
}
}
四、Controller层业务测试代码(一个流处理、一个批处理)
@RequestMapping("test")
@RestController
@AllArgsConstructor
public class TestController {
@GetMapping("test1")
public void test(Integer parallelism) throws Exception {
//创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//设置并行度
env.setParallelism(parallelism);
//读取文本流
DataStreamSource<String> source = env.socketTextStream("192.168.1.200", 8888);
SingleOutputStreamOperator<Tuple2<String, Integer>> operator = source.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) throws Exception {
String[] splits = s.split(" ");
for (String split : splits) {
collector.collect(new Tuple2<>(split, 1));
}
}
}).filter(data -> StringUtils.isNotEmpty(data.f0)).keyBy(data -> data.f0).sum(1);
//打印
operator.print();
//执行
env.execute();
}
@GetMapping("test2")
public void test2() throws Exception {
//执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//设置并行度
env.setParallelism(1);
//将一列元素作为数据源
DataStreamSource<Integer> integerDataStream = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
//控制台打印
integerDataStream.print("int");
//执行任务
env.execute();
}
}
五、在你的服务器上开启一个nc端口
六、打包上传至你的JobManager所在的服务器
七、进入Flink中的bin目录
八、后台方式运行jar包
九、通过接口访问流处理方法并行度设置为4
可以看到流处理方法所占用了4个插槽
十、通过接口访问批处理方法
可以看到该方法占用1个插槽
稍等一会再次刷新页面
发现该任务以执行完毕,插槽不再占用
总结:完成SpringBoot和Flink的高度整合,通过调用接口的方式来进行Flink任务的创建,SpringBoot在处理一些数据量比较大且计算量也比较大的业务场景时就可以通过Flink进行数据的处理入库,并且支持Flink集群节点可无限制扩充