1、环境配置
blink 3.6.0
java8
maven3.8.4
datahub
hologres
2、准备工作
datahub 依赖:BLINK-DataStream开发_大数据00的博客-CSDN博客
3、pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.gwm</groupId>
<artifactId>blink_ae</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<scala.version>2.11.12</scala.version>
<scala.binary.version>2.11</scala.binary.version>
<blink.version>blink-3.3.0</blink.version>
<java.version>1.8</java.version>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<sdk.version>0.38.3-public</sdk.version>
</properties>
<dependencies>
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-core</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- <!– <systemPath>${project.basedir}/lib/flink-core-blink-3.2.2.jar</systemPath>–>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- <!– <systemPath>${project.basedir}/lib/flink-streaming-java_2.11-blink-3.2.2.jar</systemPath>–>-->
<!-- </dependency>-->
<!--打包udf自定义函数需要添加此依赖-->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-table_${scala.binary.version}</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- <!– <systemPath>${project.basedir}/lib/flink-table_2.11-blink-3.2.2.jar</systemPath>–>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-table-common</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- <!– <systemPath>${project.basedir}/lib/flink-table_2.11-blink-3.2.2.jar</systemPath>–>-->
<!-- </dependency>-->
<!-- https://mvnrepository.com/artifact/com.alibaba.blink/flink-streaming-scala -->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-java</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-scala_${scala.binary.version}</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<!-- https://mvnrepository.com/artifact/com.alibaba.blink/flink-jdbc -->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-jdbc</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<dependency>
<groupId>com.alibaba.blink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${blink.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba.blink/flink-table-common -->
<dependency>
<groupId>com.alibaba.flink</groupId>
<artifactId>datahub-connector</artifactId>
<version>0.1-SNAPSHOT</version>
<classifier>jar-with-dependencies</classifier>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.34</version>
<scope>provided</scope>
</dependency>
<!-- <dependency>-->
<!-- <groupId>com.fasterxml.jackson.core</groupId>-->
<!-- <artifactId>jackson-databind</artifactId>-->
<!-- <version>2.11.2</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.fasterxml.jackson.core</groupId>-->
<!-- <artifactId>jackson-core</artifactId>-->
<!-- <version>2.11.2</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.fasterxml.jackson.core</groupId>-->
<!-- <artifactId>jackson-annotations</artifactId>-->
<!-- <version>2.11.2</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
</dependencies>
<!-- <build>-->
<!-- <plugins>-->
<!-- <plugin>-->
<!-- <groupId>org.apache.maven.plugins</groupId>-->
<!-- <artifactId>maven-shade-plugin</artifactId>-->
<!-- <version>3.4.1</version>-->
<!-- <executions>-->
<!-- <execution>-->
<!-- <phase>package</phase>-->
<!-- <goals>-->
<!-- <goal>shade</goal>-->
<!-- </goals>-->
<!-- <configuration>-->
<!-- <minimizeJar>true</minimizeJar>-->
<!-- <filters>-->
<!-- <filter>-->
<!-- <artifact>log4j:log4j</artifact>-->
<!-- <includes>-->
<!-- <include>**</include>-->
<!-- </includes>-->
<!-- </filter>-->
<!-- <filter>-->
<!-- <artifact>commons-logging:commons-logging</artifact>-->
<!-- <includes>-->
<!-- <include>**</include>-->
<!-- </includes>-->
<!-- </filter>-->
<!-- <filter>-->
<!-- <artifact>foo:bar</artifact>-->
<!-- <excludeDefaults>false</excludeDefaults>-->
<!-- <includes>-->
<!-- <include>foo/Bar.class</include>-->
<!-- </includes>-->
<!-- </filter>-->
<!-- </filters>-->
<!-- <relocations combine.self="override">-->
<!-- <relocation>-->
<!-- <pattern>org.glassfish.jersey</pattern>-->
<!-- <shadedPattern>com.alibaba.blink.shaded.datahub.org.glassfish.jersey</shadedPattern>-->
<!-- </relocation>-->
<!-- </relocations>-->
<!-- </configuration>-->
<!-- </execution>-->
<!-- </executions>-->
<!-- </plugin>-->
<!-- </plugins>-->
<!-- </build>-->
<!-- 添加依赖打包 -->
<!-- <build>-->
<!-- <plugins>-->
<!-- <plugin>-->
<!-- <groupId>org.apache.maven.plugins</groupId>-->
<!-- <artifactId>maven-assembly-plugin</artifactId>-->
<!-- <version>3.1.1</version>-->
<!-- <configuration>-->
<!-- <archive>-->
<!-- <manifest>-->
<!-- <mainClass>com.gwm.driver.AeCollectorData</mainClass>-->
<!-- </manifest>-->
<!-- </archive>-->
<!-- <descriptorRefs>-->
<!-- <descriptorRef>jar-with-dependencies</descriptorRef>-->
<!-- </descriptorRefs>-->
<!-- </configuration>-->
<!-- <executions>-->
<!-- <execution>-->
<!-- <id>make-assembly</id>-->
<!-- <phase>package</phase>-->
<!-- <goals>-->
<!-- <goal>single</goal>-->
<!-- </goals>-->
<!-- </execution>-->
<!-- </executions>-->
<!-- </plugin>-->
<!-- </plugins>-->
<!-- </build>-->
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.0</version>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
</configuration>
<executions>
<execution>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<manifestEntries>
<Main-Class>com.gwm.driver.AeCollectorData</Main-Class>
<X-Compile-Source-JDK>${maven.compiler.source}</X-Compile-Source-JDK>
<X-Compile-Target-JDK>${maven.compiler.target}</X-Compile-Target-JDK>
</manifestEntries>
</transformer>
</transformers>
<relocations combine.self="override">
<relocation>
<pattern>org.glassfish.jersey</pattern>
<shadedPattern>com.alibaba.blink.shaded.datahub.org.glassfish.jersey</shadedPattern>
</relocation>
</relocations>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
4、打包依赖
这个是不会出现冲突的打包方式,否则有冲突会报错:如何使用JAR作业读取DataHub数据_实时计算Flink版-阿里云帮助中心
常见问题
在作业运行时,如果界面上出现如下类似的错误,表示存在JAR包冲突。
java.lang.AbstractMethodError:com.alibaba.fastjson.support.jaxrs.FastJsonAutoDiscoverable.configure(Lcom/alibaba/blink/shaded/datahub/javax/ws/rs/core/FeatureContext;)
建议您使用maven-shade-plugin插件的Relocation功能,解决JAR包冲突的问题。
<relocations combine.self="override">
<relocation>
<pattern>org.glassfish.jersey</pattern>
<shadedPattern>com.alibaba.blink.shaded.datahub.org.glassfish.jersey</shadedPattern>
</relocation>
</relocations>
通过以下打包方式解决:
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.0</version>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
</configuration>
<executions>
<execution>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<manifestEntries>
<Main-Class>com.gwm.driver.AeCollectorData</Main-Class>
<X-Compile-Source-JDK>${maven.compiler.source}</X-Compile-Source-JDK>
<X-Compile-Target-JDK>${maven.compiler.target}</X-Compile-Target-JDK>
</manifestEntries>
</transformer>
</transformers>
<relocations combine.self="override">
<relocation>
<pattern>org.glassfish.jersey</pattern>
<shadedPattern>com.alibaba.blink.shaded.datahub.org.glassfish.jersey</shadedPattern>
</relocation>
</relocations>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
5、代码
package com.gwm.driver;
import com.alibaba.flink.connectors.datahub.datastream.source.DatahubSourceFunction;
import com.aliyun.datahub.client.model.RecordEntry;
import com.gwm.pojo.Aecollectordata;
import com.gwm.utils.ConfigPropUtils;
import com.gwm.utils.getString;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
import java.util.List;
/**
* @author yangyingchun
* @version 1.0
* @date 2022/11/14 16:26
*/
public class AeCollectorDataCopy {
private static String endPoint = "endPoint url";
//private static String endPoint ="public endpoint";//公网访问(填写内网Endpoint,就不用填写公网Endpoint)。
private static String projectName = "project_name";
private static String topicSourceName = "topic";
private static String topicSinkName = ConfigPropUtils.get("datahub_sink_topic");
private static String accessId = "accessId ";
private static String accessKey = "accessKey ";
//设置消费的启动位点对应的时间。TimeToStampUtil.timeToStamp("2021-12-21") 此时间至少为当前时间
// private static Long datahubStartInMs = TimeToStampUtil.timeToStamp("2022-09-27");
private static Long datahubStartInMs = System.currentTimeMillis();
private static Long datahubEndInMs=Long.MAX_VALUE;
public static void main(String[] args) throws Exception {
// OdpsLogger logger = new OdpsLogger(
// AeCollectorData.class.getName(),OdpsLogger.getDefaultOutputPath(),true,"");
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.enableCheckpointing(3600000L);
env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 2000L));
env.setParallelism(8);
DataStreamSource<List<RecordEntry>> aedata = env.addSource(
new DatahubSourceFunction(
endPoint,
projectName,
topicSourceName,
accessId,
accessKey,
datahubStartInMs,
datahubEndInMs,
20L,
1000L,
1000
));
DataStream<Tuple2<String, String>> aecollectordataDataStream = aedata.flatMap(new FlatMapFunction<List<RecordEntry>, Tuple2<String, String>>() {
@Override
public void flatMap(List<RecordEntry> value, Collector<Tuple2<String, String>> out) throws Exception {
for (RecordEntry recordEntry : value) {
String timestamp = getString.getString(recordEntry, "timestamp");
String message = getString.getString(recordEntry, "message");
Tuple2<String, String> stringStringTuple2 = new Tuple2<>(timestamp,message);
out.collect(stringStringTuple2);
}
}
});
aecollectordataDataStream.print();
env.execute();
}
}
6、打包上传,启动