以下内容基于flink1.12
pom依赖
<properties>
<encoding>UTF-8</encoding>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<java.version>1.8</java.version>
<scala.version>2.12</scala.version>
<flink.version>1.12.2</flink.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.11</artifactId>
<version>1.12.1</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.5</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.7</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.8</version>
</dependency>
<!--flink cdc-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.49</version>
</dependency>
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-connector-mysql-cdc</artifactId>
<version>2.0.0</version>
</dependency>
</dependencies>
代码:
/**
* @author sanhongbo
* @date 2022/3/4
* @description flink wordcount
**/
@Slf4j
public class ProcessFunction {
public static void main(String[] args) throws Exception {
// 声明环境
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
final Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "localhost:9092");
properties.setProperty("group.id", "source");
FlinkKafkaConsumer<String> flinkKafkaConsumer = new FlinkKafkaConsumer<>("source_topic", new SimpleStringSchema(), properties);
// source
SingleOutputStreamOperator<String> source = env.addSource(flinkKafkaConsumer)
.name("source")
.uid("source");
// 设置watermark
/*WatermarkStrategy<String> stringWatermarkStrategy = WatermarkStrategy.<String>forMonotonousTimestamps()
.withTimestampAssigner((str, ts) -> getWaterMark(str));
source.assignTimestampsAndWatermarks(stringWatermarkStrategy);*/
// transform
// 1.不使用tuple结构 直接使用自定义实体类
SingleOutputStreamOperator<Count> reduce = process(source);
// 2.使用tuple结构直接计算
SingleOutputStreamOperator<Tuple2<String, Integer>> tuple = processOfTuple(source);
tuple.print();
// sink
FlinkKafkaProducer<String> stringFlinkKafkaProducer = new FlinkKafkaProducer<>(
"sink_topic",
new KafkaSerializationSchemaWrapper<>(
"sink_topic", null, false, new SimpleStringSchema()),
properties,
FlinkKafkaProducer.Semantic.EXACTLY_ONCE);
reduce.map(
(MapFunction<Count, String>) count -> {
final Gson gson = new GsonBuilder().create();
return gson.toJson(count);
}).addSink(stringFlinkKafkaProducer);
tuple.map((MapFunction<Tuple2<String, Integer>, String>) Tuple2::toString)
.returns(TypeInformation.of(String.class)).addSink(stringFlinkKafkaProducer);
// 提交执行
env.execute("flink-test");
}
/**
* 使用tuple结构
*/
public static SingleOutputStreamOperator<Tuple2<String, Integer>> processOfTuple(SingleOutputStreamOperator<String> source){
SingleOutputStreamOperator<Tuple2<String, Integer>> sum = source.flatMap((String line, Collector<Tuple2<String, Integer>> collector) -> {
String[] tokens = line.split(",",-1);
// 输出结果 (word, 1)
for (String token : tokens) {
if (token.length() > 0) {
collector.collect(new Tuple2<>(token, 1));
}
}
})
.returns(Types.TUPLE(Types.STRING, Types.INT))
.keyBy(0)
.window(TumblingProcessingTimeWindows.of(Time.seconds(5)))
.sum(1);
return sum;
}
/**
* 不使用tuple
*/
public static SingleOutputStreamOperator<Count> process(SingleOutputStreamOperator<String> source) {
SingleOutputStreamOperator<Count> objectSingleOutputStreamOperator = source.flatMap((FlatMapFunction<String, Count>) (s, collector) -> {
String[] split = s.split(",", -1);
for (String s1 : split) {
collector.collect(new Count(s1, 1));
}
}).returns(TypeInformation.of(Count.class));
SingleOutputStreamOperator<Count> reduce = objectSingleOutputStreamOperator.keyBy(Count::getWord).reduce(new ReduceFunction<Count>() {
@Override
public Count reduce(Count count, Count t1) throws Exception {
return new Count(count.getWord(), count.getCount() + t1.getCount());
}
});
return reduce;
}
public static class Count implements Serializable {
String word;
int count;
public Count(String word, int count) {
this.word = word;
this.count = count;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
public int getCount() {
return count;
}
public void setCount(int count) {
this.count = count;
}
@Override
public String toString() {
return "{" +
"word='" + word + '\'' +
", count=" + count +
'}';
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Count count1 = (Count) o;
return count == count1.count && Objects.equals(word, count1.word);
}
@Override
public int hashCode() {
return Objects.hash(word, count);
}
}
public static final DateTimeFormatter STANDARD_PATTERN_FORMATTER =
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
/**
* 从kafka消息中获取eventTime
*/
public static long getWaterMark(String message) {
long eventTime;
try {
final Gson gson = new GsonBuilder().create();
final JsonObject json = gson.fromJson(message, JsonObject.class);
final JsonElement eventTimeElement = json.get("eventTime");
if (Objects.isNull(eventTimeElement) || eventTimeElement.isJsonNull()) {
log.warn("input message {} do not contain `eventTime`," +
" return `System.currentTimeMillis()` as eventTime", message);
eventTime = System.currentTimeMillis();
} else {
final String eventTimeStr = eventTimeElement.getAsString();
eventTime = LocalDateTime.parse(eventTimeStr, STANDARD_PATTERN_FORMATTER)
.toInstant(ZoneOffset.ofHours(8)).toEpochMilli();
}
} catch (Exception exp) {
log.error("extract `eventTime` from message {} fails," +
" return `System.currentTimeMillis()` as eventTime", message, exp);
eventTime = System.currentTimeMillis();
}
return eventTime;
}
}
附上本地安装kafka步骤(自行测试)
https://blog.csdn.net/sanhongbo/article/details/123574606