写在前面:这里使用的是Spring中内嵌Flink 并不是Flink集群服务器,我也不知道为啥公司要这么搞,可能是没钱吧。
pom引入
这里只引入了跟Flink相关的jar包 其他web等暂不写入。
<properties>
<flink.version>1.10.0</flink.version>
</properties>
<dependencies>
<!-- Flink -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table</artifactId>
<version>${flink.version}</version>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.11</artifactId>
<version>1.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.11</artifactId>
<version>1.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>1.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.10_2.11</artifactId>
<version>${flink.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.10.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-redis_2.10</artifactId>
<version>1.1.5</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-jdbc_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
</dependencies>
使用flink
处理kafka中的数据
@Component
@Order(value = 1)
@Slf4j
public class FlinkClass implements CommandLineRunner{
// kafka 配置
private static final String BOOTSTRAP_SERVERS = "10.**,***.118:9990";
private static final String GROUP_ID = "0";
private static final String topic = "xxx";
@Autowired
private RedisTemplate<String, String> redisTemplate;
@Override
public void run(String... args) {
System.err.println("------ Flink任务 ------");
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 设置并发
env.setParallelism(1);
/**
* 指定处理时间
* ProcessingTime是以operator处理的时间为准,它使用的是机器的系统时间来作为data stream的时间
* IngestionTime 是以数据进入flink streaming data flow的时间为准
* EventTime 是以数据自带的时间戳字段为准,应用程序需要指定如何从record中抽取时间戳字段
*/
env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", BOOTSTRAP_SERVERS);
properties.setProperty("group.id", GROUP_ID);
FlinkKafkaConsumer010<String> consumer = new FlinkKafkaConsumer010<>(topic, new SimpleStringSchema(), properties);
consumer.setStartFromLatest();
DataStream<String> stream = env.addSource(consumer);
// 这里做初步处理,处理为指定格式
DataStream<Tuple5<String,String,Boolean,String,AiStatisticsBO>> ds = stream.flatMap(new FlinkClass.LineSplitter());
// Tuple5 <接口,调用方,是否成功,调用平台,bo(源数据)>
// 每隔60s 统计 keyBy(index) : 分组;index 为上面初步处理的元组的下标
// 如果制定了CountAggregate和WindowResult keyBy 不重要
DataStream<Tuple2<Long, JSONObject>> wcount = ds.keyBy(0)
.timeWindow(Time.seconds(60))
// countAggregate() 根据上面指定的元组类型。
// CountAggregate 聚合函数 逻辑都写在这里
// WindowResult:窗口期结果处理函数
.aggregate(new FlinkClass.CountAggregate(), new FlinkClass.WindowResult());
// redis
FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost("10.***,*.35").setPort(6380).setDatabase(0).build();
wcount.addSink(new RedisSink<>(conf, new FlinkClass.RedisExampleMapper()));
wcount.print();
// 任务名 唯一。
env.execute("TECH");
}
/**
* 流处理-指定格式
* Tuple5 <接口,调用方,是否成功,调用平台,bo(源数据)>
*/
private static final class LineSplitter implements FlatMapFunction<String, Tuple5<String,String,Boolean,String,BO>> {
@Override
public void flatMap(String value, Collector<Tuple5<String, String,Boolean,String, AiStatisticsBO>> out) {
try {
long time = System.currentTimeMillis()-System.currentTimeMillis()%(1000*60);
JSONObject json = JSON.parseObject(value);
BO bo = JSON.toJavaObject(json, BO.class);
bo.setStatisticTime(String.valueOf(time));
out.collect(new Tuple5<>(bo.getCallInterface(), bo.getCaller(),bo.getCallIsSuc(),bo.getCallPlatform(), bo));
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* Flink 聚合类
* @description:
* @author: xzy
* @date: 2020/12/24
* 三个参数 分别为 IN ACC OUT 入参 处理 出参
*/
private static final class CountAggregate implements AggregateFunction<Tuple5<String,String,Boolean,String,AiStatisticsBO>,
Tuple6<Integer, Integer, Integer, LinkedHashMap<String, Integer>, LinkedHashMap<String, Integer>,ArrayList>, Tuple1<JSONObject>> {
/**
* 1 每个窗口期只会进一次
* 创建一个数据统计的容器,提供给后续操作使用
* 这分钟内的总调用量, 这分钟内的调用成功量, 这分钟内的总耗时, 各个接口调用情况, 调用方统计
* @return
*/
@Override
public Tuple6<Integer, Integer, Integer, LinkedHashMap<String, Integer>, LinkedHashMap<String, Integer>,ArrayList> createAccumulator() {
// 各个接口 调用量
LinkedHashMap<String, Integer> map = new LinkedHashMap<>();
for (int i = 0; i < techArr.length; i++) {
map.put(techArr[i],0);
}
// 调用方 调用量
LinkedHashMap<String, Integer> callerMap = new LinkedHashMap<>();
int a=0,b= 0,c= 0;
ArrayList list = new ArrayList();
return new Tuple6<>(a, b, c, map, callerMap,list);
}
/**
* 2
* 每个元素被添加进窗口的时候调用
* @param value :值
* @param accumulator :统计容器
* @return
*/
@Override
public Tuple6<Integer, Integer, Integer, LinkedHashMap<String, Integer>, LinkedHashMap<String, Integer>,ArrayList> add
(Tuple5<String, String, Boolean, String, AiStatisticsBO> value,
Tuple6<Integer, Integer, Integer, LinkedHashMap<String, Integer>, LinkedHashMap<String, Integer>,ArrayList> accumulator) {
// 这分钟内的总调用量
accumulator.f0++;
if(value.f4.getCallIsSuc()){
// 这分钟内的成功调用量
accumulator.f1++;
}
// 总耗时
accumulator.f2+=value.f4.getCallTime();
// 接口 调用量 map
LinkedHashMap<String,Integer> map = accumulator.f3;
if(map.containsKey(value.f4.getCallInterface())){
map.put(value.f4.getCallInterface(),
map.get(value.f4.getCallInterface())+1);
}else{
map.put(value.f4.getCallInterface(),1);
}
accumulator.f3 = map;
// 调用方 map
LinkedHashMap<String,Integer> callerMap = accumulator.f4;
if(callerMap.containsKey(value.f4.getCaller())){
callerMap.put(value.f4.getCaller(),callerMap.get(value.f4.getCaller())+1);
}else{
callerMap.put(value.f4.getCaller(),1);
}
accumulator.f4 = callerMap;
accumulator.f5.add(value.f4);
return new Tuple6<>(accumulator.f0,accumulator.f1,accumulator.f2,accumulator.f3,accumulator.f4,accumulator.f5);
}
/**
* 3 每分钟一次
* 窗口统计事件触发时调用来返回出统计的结果
* @param accumulator: 统计容器
* @return
*/
@Override
public Tuple1<JSONObject> getResult(Tuple6<Integer, Integer, Integer, LinkedHashMap<String, Integer>, LinkedHashMap<String, Integer>,ArrayList> accumulator) {
JSONObject json = new JSONObject();
json.put("currCallCount", accumulator.f0);
json.put("currCallSucCount", accumulator.f1);
json.put("currCallTime", accumulator.f2);
json.put("eachInterface", accumulator.f3);
json.put("eachCaller",accumulator.f4);
json.put("allInterfaceList", accumulator.f5);
return new Tuple1<>(json);
}
/**
* session处理函数 一般不会进来
*
* 这分钟内的总调用量, 这分钟内的调用成功量, 这分钟内的总耗时, 各个接口调用情况, 调用方统计
* @param a:
* @param b:
* @return
*/
@Override
public Tuple6<Integer, Integer, Integer, LinkedHashMap<String, Integer>, LinkedHashMap<String, Integer>,ArrayList> merge(Tuple6<Integer, Integer, Integer, LinkedHashMap<String, Integer>, LinkedHashMap<String, Integer>,ArrayList> a,
Tuple6<Integer, Integer, Integer, LinkedHashMap<String, Integer>, LinkedHashMap<String, Integer>,ArrayList> b) {
a.f3.forEach((k,v)->{
b.f3.merge(k,v,(v1,v2)->{
return v1+v2;
});
});
a.f4.forEach((k,v)->{
b.f4.merge(k,v, Integer::sum);
});
a.f5.addAll(b.f5);
return new Tuple6<>(a.f0 + b.f0, a.f1 + b.f1, a.f2 + b.f2, b.f3, b.f4,a.f5);
}
}
public static final class WindowResult implements WindowFunction<Tuple1<JSONObject>, Tuple2<Long, JSONObject>, Tuple, TimeWindow> {
@Override
public void apply(Tuple key, TimeWindow window, Iterable<Tuple1<JSONObject>> input, Collector<Tuple2<Long,JSONObject>> out) throws Exception {
long windowStart = window.getStart();
JSONObject json = input.iterator().next().f0;
out.collect(Tuple2.of( windowStart, json));
}
}
}
总结
有几个问题
- 没法使用多个Flink,随着任务起动而起,如果有第二个Flink任务 则不会执行(通过注解@Order设置顺序)