Flink-如果某一个并行度的watermark不推进导致整个下游的watermark无法推进,如何处理?

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.api.common.eventtime;

import org.apache.flink.annotation.Public;

import java.time.Duration;

import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * A WatermarkGenerator for situations where records are out of order, but you can place an upper
 * bound on how far the events are out of order. An out-of-order bound B means that once an event
 * with timestamp T was encountered, no events older than {@code T - B} will follow any more.
 *
 * <p>The watermarks are generated periodically. The delay introduced by this watermark strategy is
 * the periodic interval length, plus the out-of-orderness bound.
 *
 * JVM在加载类的时候,遵循原则: 双亲委派模型。
 *  说人话: 如果JVM在加载类时,加载了两个同名的类。
 *          优先加载你自己源码目录中的类
 *
 * 哈哈哈  二开
 */
@Public
public class BoundedOutOfOrdernessWatermarks<T> implements WatermarkGenerator<T> {

    /** The maximum timestamp encountered so far. */
    private long maxTimestamp;

    /** The maximum out-of-orderness that this watermark generator assumes. */
    private final long outOfOrdernessMillis;

    /**
     * Creates a new watermark generator with the given out-of-orderness bound.
     *
     * @param maxOutOfOrderness The bound for the out-of-orderness of the event timestamps.
     */
    public BoundedOutOfOrdernessWatermarks(Duration maxOutOfOrderness) {
        checkNotNull(maxOutOfOrderness, "maxOutOfOrderness");
        checkArgument(!maxOutOfOrderness.isNegative(), "maxOutOfOrderness cannot be negative");

        this.outOfOrdernessMillis = maxOutOfOrderness.toMillis();

        // start so that our lowest watermark would be Long.MIN_VALUE.
        this.maxTimestamp = Long.MIN_VALUE + outOfOrdernessMillis + 1;
    }

    // ------------------------------------------------------------------------

    @Override
    public void onEvent(T event, long eventTimestamp, WatermarkOutput output) {
        maxTimestamp = Math.max(maxTimestamp, eventTimestamp);
    }

    @Override
    public void onPeriodicEmit(WatermarkOutput output) {
        Watermark watermark = new Watermark(maxTimestamp - outOfOrdernessMillis - 1);
        output.emitWatermark(watermark);
        System.out.println(Thread.currentThread().getName() +"向下游发送了WM:"+watermark.getTimestamp());

    }
}

 

package com.atguigu.flink.utils;

import com.atguigu.flink.pojo.WaterSensor;

import java.util.ArrayList;
import java.util.List;

/**
 * Created by Smexy on 2023/6/20
 */
public class MyUtil
{
    //把窗口中的所有数据转为List集合
    public static <T> List<T> parseToList(Iterable<T> iterable){

        List<T> result = new ArrayList<>();

        for (T t : iterable) {
            result.add(t);
        }

        return result;
    }
}

 

package com.atguigu.flink.pojo;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

@Data
@NoArgsConstructor
@AllArgsConstructor
public class WaterSensor {
    private String id;
    private Long ts;
    private Integer vc;

    @Override
    public String toString() {
        return "ws("+id+","+ts+","+vc+")";
    }
}

 

package com.atguigu.flink.func;

import com.atguigu.flink.pojo.WaterSensor;
import org.apache.flink.api.common.functions.MapFunction;

/**
 * Created by Smexy on 2023/4/4
 */
public class WaterSensorMapFunction implements MapFunction<String, WaterSensor>
{
    @Override
    public WaterSensor map(String value) throws Exception {
        String[] words = value.split(",");
        return new WaterSensor(
            words[0],
            Long.valueOf(words[1]),
            Integer.valueOf(words[2])
        );
    }
}
package com.atguigu.flink.timeAndwindow;

import com.atguigu.flink.func.WaterSensorMapFunction;
import com.atguigu.flink.pojo.WaterSensor;
import com.atguigu.flink.utils.MyUtil;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;

import java.time.Duration;

/**
 * Created by Smexy on 2023/6/20
 *  如果某一个并行度的watermark不推进导致整个下游的watermark无法推荐,如何处理?
 *
 *  查看水印?
 *      WEBUI: 当前看不到,看到下游收到的水印。
 *
 *  ------------------------------
 *      6个MapTask,只有1个能收到数据,但是6个人都要发送水印。
 *          设置Task发送水印的资格。
 *              如果一个Task,长期无法收到数据,导致水印无法更新,推进。就取消这个Task的水印发送资格!
 *
 *
 *           Idle: 空闲,赋闲。
 *
 */
public class Flink13_MultiParilismWaterMark
{
    public static void main(String[] args) {

        Configuration conf = new Configuration();
        conf.setInteger("rest.port", 3333);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);

        //修改水印的默认的发送频率
        env.getConfig().setAutoWatermarkInterval(2000);

        //模拟多并行度
        env.setParallelism(6);

        //声明水印策略
         WatermarkStrategy<WaterSensor> watermarkStrategy = WatermarkStrategy
                 //第一部分: 水印的特征。 连续,乱序
                     .<WaterSensor>forMonotonousTimestamps()
                 //第二部分: 如何从数据中提取事件时间
                     .withTimestampAssigner( (e, ts) -> e.getTs())
                    .withIdleness(Duration.ofSeconds(10))
             ;

                env
                   .socketTextStream("hadoop102", 8888)
                   //全局汇总 所有的数据都给下游的第一个Task
                    .global()
                   .map(new WaterSensorMapFunction())
                   .assignTimestampsAndWatermarks(watermarkStrategy)
                    /*
                        数据落入哪个窗口不看水印(没有半毛钱关系)。只看 数据的EventTime和窗口的范围。
                        [0,4999):
                        [5000,9999):
                     */
                    .windowAll(TumblingEventTimeWindows.of(Time.milliseconds(5000)))
                    .process(new ProcessAllWindowFunction<WaterSensor, String, TimeWindow>()
                    {
                        @Override
                        public void process(ProcessAllWindowFunction<WaterSensor, String, TimeWindow>.Context context, Iterable<WaterSensor> iterable, Collector<String> collector) throws Exception {

                            //输出窗口中的所有的元素
                            collector.collect(MyUtil.parseToList(iterable).toString());
                        }
                    })
                    .print();


        
                try {
                            env.execute();
                        } catch (Exception e) {
                            e.printStackTrace();
                        }
        
    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值