kafka输入
package com.myflink.data.sinks;
import com.myflink.data.sinks.sinks.SinkToKafka02;
import com.zhisheng.common.utils.GsonUtil;
import com.myflink.data.sinks.model.Student;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;
import org.codehaus.commons.nullanalysis.Nullable;
import java.text.SimpleDateFormat;
import java.util.Properties;
public class WatermarkTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
Properties properties = new Properties();
properties.put("bootstrap.servers", "localhost:9092");
properties.put("group.id", "myGroup");
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("auto.offset.reset", "latest");
DataStream<String> stream =
env.addSource(new FlinkKafkaConsumer011<>("ecs-pre-alarm-monitor", new SimpleStringSchema(), properties));
DataStream<Tuple5<Integer, String, String, Integer, Long>> inputMap = stream.map(new MapFunction<String, Tuple5<Integer, String, String, Integer, Long>>() {
private static final long serialVersionUID = -8812094804806854937L;
@Override
public Tuple5<Integer, String, String, Integer, Long> map(String value) throws Exception {
System.out.println("收到的数值 value:"+value);
Student student = GsonUtil.fromJson(value, Student.class);
return new Tuple5<Integer, String, String, Integer, Long>(student.getId(),student.getName(),student.getPassword(),student.getAge(), student.getRideTime());
}
});
DataStream<Tuple5<Integer, String, String, Integer, Long>> watermark =
inputMap.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<Tuple5<Integer, String, String, Integer, Long>>() {
private static final long serialVersionUID = 8252616297345284790L;
Long currentMaxTimestamp = 0L;
Long maxOutOfOrderness = 1000L;//最大允许的乱序时间是10s
Watermark watermark = null;
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
@Nullable
@Override
public Watermark getCurrentWatermark() {
watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness);
return watermark;
}
@Override
public long extractTimestamp(Tuple5<Integer, String, String, Integer, Long> element, long previousElementTimestamp) {
Long timestamp = element.f4;
currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
return timestamp;
}
});
watermark.keyBy(4).window(TumblingEventTimeWindows.of(Time.seconds(30)))
.apply(new WindowFunction<Tuple5<Integer, String, String, Integer, Long>, String, Tuple, TimeWindow>() {
private static final long serialVersionUID = 7813420265419629362L;
@Override
public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple5<Integer, String, String, Integer, Long>> input, Collector<String> out) throws Exception {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
out.collect("window " + format.format(window.getStart()) + " window " + format.format(window.getEnd()));
}
}).print();
SinkToKafka02 sinkToKafka02 = new SinkToKafka02();
watermark.addSink(sinkToKafka02);
env.execute("window test");
}
}
kafka输出
package com.myflink.data.sinks.sinks;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.util.Properties;
public class SinkToKafka02 extends RichSinkFunction<Tuple5<Integer, String, String, Integer, Long>> {
/**
* 每条数据的插入都要调用一次 invoke() 方法
*
* @param value
* @param context
* @throws Exception
*/
@Override
public void invoke(Tuple5<Integer, String, String, Integer, Long> value, Context context) throws Exception {
System.out.println("收到水槽的数据啦 value:"+value);
this.hWriteToKafka(String.valueOf(value));
}
public static void hWriteToKafka(String value) throws InterruptedException {
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
KafkaProducer producer = new KafkaProducer<String, String>(props);
for (int i = 1; i <= 2; i++) {
ProducerRecord record = new ProducerRecord<String, String>("myTopic", null, null, value);
producer.send(record);
System.out.println("xxxx发送数据: " + value);
}
producer.flush();
}
}
pom依赖
<proper
```cpp
ties>
<compiler.version>1.8</compiler.version>
<flink.version>1.6.2</flink.version>
<java.version>1.8</java.version>
<scala.binary.version>2.11</scala.binary.version>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!--加入下面两个依赖才会出现 Flink 的日志出来-->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.25</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-table -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table_2.11</artifactId>
<version>1.6.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-scala -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>1.6.2</version>
</dependency>
</dependencies>