文章目录
idea新建maven项目,配置scala环境
File–>Project Structure -->Modules,添加scala依赖库
配置pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.yxh.hjt.hw</groupId>
<artifactId>FlinkDemo</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.12</artifactId>
<version>1.14.4</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-scala -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.12</artifactId>
<version>1.14.4</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.12</artifactId>
<version>1.14.4</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-simple -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.25</version>
<!--<scope>test</scope>-->
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.4.6</version>
<executions>
<execution> <!-- 声明绑定到 maven 的 compile 阶段 -->
<goals>
<goal>compile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
Flink 批处理
import org.apache.flink.api.scala._
object WordCount {
def main(args: Array[String]): Unit = {
//创建环境
val env = ExecutionEnvironment.getExecutionEnvironment
//input
val inputPath:String = "D:\\LearnWorkSpace\\FlinkDemo\\src\\main\\resources\\Data\\hello.txt"
val inputDs:DataSet[String] = env.readTextFile(inputPath)
//Transmation
val wordCountDs: AggregateDataSet[(String,Int)]=inputDs.flatMap((_: String).split(" ")).map((_,1)).groupBy(0).sum(1)
//oupput sink
wordCountDs.print()
wordCountDs.writeAsText("flink-ret")
env.execute("text WordCount")
}
}
运行时如果出现的报错:
[main] INFO org.apache.flink.api.java.utils.PlanGenerator - The job has 0 registered types and 0 default Kryo serializers
Exception in thread "main" java.lang.IllegalStateException: No ExecutorFactory found to execute the application.
at org.apache.flink.core.execution.DefaultExecutorServiceLoader.getExecutorFactory(DefaultExecutorServiceLoader.java:88)
at org.apache.flink.api.java.ExecutionEnvironment.executeAsync(ExecutionEnvironment.java:1043)
at org.apache.flink.api.java.ExecutionEnvironment.execute(ExecutionEnvironment.java:958)
at org.apache.flink.api.java.ExecutionEnvironment.execute(ExecutionEnvironment.java:942)
at org.apache.flink.api.java.DataSet.collect(DataSet.java:417)
at org.apache.flink.api.java.DataSet.print(DataSet.java:1748)
at org.apache.flink.api.scala.DataSet.print(DataSet.scala:1864)
at WordCount$.main(WordCount.scala:20)
at WordCount.main(WordCount.scala)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:78)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:567)
at com.intellij.rt.execution.application.AppMainV2.main(AppMainV2.java:131)
则需要在pom.xml中添加依赖:
<!--https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.12</artifactId>
<version>1.14.4</version>
</dependency>
如果报错
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
则需要在pom.xml中添加slf4j-simple依赖
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-simple -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.25</version>
<!--<scope>test</scope>-->
</dependency>
Flink流处理
scala代码
import org.apache.flink.streaming.api.scala._
object WordCountFlow {
def main(args: Array[String]): Unit = {
// 创建流处理的执行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
// env.setParallelism(8)
// env.disableOperatorChaining()
// 从外部命令中提取参数,作为socket主机名和端口号
val host: String = "192.168.0.47"
val port: Int = 8888
// 接收一个socket文本流
val inputDataStream: DataStream[String] = env.socketTextStream(host, port)
// 进行转化处理统计
val resultDataStream: DataStream[(String, Int)] = inputDataStream
.flatMap(_.split(" "))
.filter(_.nonEmpty)
.map((_, 1))
.keyBy(0)
.sum(1)
resultDataStream.print().setParallelism(1)
// 启动任务执行
env.execute("stream word count")
}
}
在Linux上可以使用nc -lk 8888模拟一个Socket服务。
可能遇到的问题
- module java.base does not “opens java.lang” to unnamed module @482f8f11
Exception in thread "main" java.lang.reflect.InaccessibleObjectException: Unable to make field private final byte[] java.lang.String.value accessible: module java.base does not "opens java.lang" to unnamed module @482f8f11
at java.base/java.lang.reflect.AccessibleObject.checkCanSetAccessible(AccessibleObject.java:357)
at java.base/java.lang.reflect.AccessibleObject.checkCanSetAccessible(AccessibleObject.java:297)
at java.base/java.lang.reflect.Field.checkCanSetAccessible(Field.java:177)
at java.base/java.lang.reflect.Field.setAccessible(Field.java:171)
at org.apache.flink.api.java.ClosureCleaner.clean(ClosureCleaner.java:104)
at org.apache.flink.api.java.ClosureCleaner.clean(ClosureCleaner.java:126)
at org.apache.flink.api.java.ClosureCleaner.clean(ClosureCleaner.java:71)
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.clean(StreamExecutionEnvironment.java:1821)
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.addSource(StreamExecutionEnvironment.java:1584)
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.addSource(StreamExecutionEnvironment.java:1529)
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.socketTextStream(StreamExecutionEnvironment.java:1333)
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.socketTextStream(StreamExecutionEnvironment.java:1373)
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.socketTextStream(StreamExecutionEnvironment.java:1390)
at org.apache.flink.streaming.api.scala.StreamExecutionEnvironment.socketTextStream(StreamExecutionEnvironment.scala:608)
at WordCountFlow$.main(WordCountFlow.scala:16)
at WordCountFlow.main(WordCountFlow.scala)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:78)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:567)
at com.intellij.rt.execution.application.AppMainV2.main(AppMainV2.java:131)
解决方法:VM options中添加:–add-opens java.base/java.lang=ALL-UNNAMED
- 问题二:cannot access class sun.net.util.IPAddressUtil (in module java.base) because module java.base does not export sun.net.util to unnamed module @0x4501b7af
Exception in thread "main" java.lang.IllegalAccessError: class org.apache.flink.util.NetUtils (in unnamed module @0x4501b7af) cannot access class sun.net.util.IPAddressUtil (in module java.base) because module java.base does not export sun.net.util to unnamed module @0x4501b7af
at org.apache.flink.util.NetUtils.unresolvedHostToNormalizedString(NetUtils.java:167)
at org.apache.flink.util.NetUtils.unresolvedHostAndPortToNormalizedString(NetUtils.java:195)
at org.apache.flink.runtime.clusterframework.BootstrapTools.startActorSystem(BootstrapTools.java:254)
at org.apache.flink.runtime.clusterframework.BootstrapTools.startActorSystem(BootstrapTools.java:164)
at org.apache.flink.runtime.rpc.akka.AkkaRpcServiceUtils.createRpcService(AkkaRpcServiceUtils.java:126)
at org.apache.flink.runtime.metrics.util.MetricUtils.startMetricsRpcService(MetricUtils.java:139)
at org.apache.flink.runtime.minicluster.MiniCluster.start(MiniCluster.java:286)
at org.apache.flink.client.program.PerJobMiniClusterFactory.submitJob(PerJobMiniClusterFactory.java:79)
at org.apache.flink.client.deployment.executors.LocalExecutor.execute(LocalExecutor.java:81)
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1733)
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1634)
at org.apache.flink.streaming.api.environment.LocalStreamEnvironment.execute(LocalStreamEnvironment.java:74)
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620)
at org.apache.flink.streaming.api.scala.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.scala:678)
at WordCountFlow$.main(WordCountFlow.scala:28)
at WordCountFlow.main(WordCountFlow.scala)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:78)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:567)
at com.intellij.rt.execution.application.AppMainV2.main(AppMainV2.java:131)
解决方法:VM options中添加参数:
–add-opens java.base/sun.net.util=ALL-UNNAMED
提交作业
打成jar包后,将jar上传到flink,可以通过命令行或者web页面的方式提交作业。
命令行模式提交作业:
flink -c 入口类 -p 并行度 jar包的路径 程序中的外部参数(eg. --host=localhost)
[root@k8s-node3 myLearning]# flink run -c com.hjt.yxh.hw.WordCountFlow -p 2 /home/software/flink-1.14.4/examples/myLearning/FlinkDemo-1.0-SNAPSHOT.jar
- 取消作业:flink cancel jobid
- flink list -a 查看所有的作业