改写flink kafka consumer实现自定义读取消息及控制

最新推荐文章于 2023-10-19 10:44:12 发布

唐予之_

最新推荐文章于 2023-10-19 10:44:12 发布

阅读量1.5k

点赞数

分类专栏： Flink

本文链接：https://blog.csdn.net/lxhandlbb/article/details/110789331

版权

Flink 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

背景：
flink1.8
实现暂停消费kafka
并能接受到通知，继续消费kafka

需求描述：

实现思路

类1：

package org.apache.flink.streaming.connectors.kafka.internal;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;

import java.util.Base64;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * @author hulb
 * @date 2019-05-06 09:13
 */
public class ProcessUtils {

	private final static String TIME_FORMAT_DAY = "yyyyMMdd";
	private final static String FULL_TIME_FORMAT_DAY = "yyyy-MM-dd HH:mm:ss";
	private final static Integer dayTimeLength = 10;
	private final static String ENABLE_FLAG = "enable";
	private final static Integer TIME_FLAG_START = 19;
	private final static Integer TIME_FLAG_END= 2;

	/**
	 * 获得今日的数据处理日期
	 *
	 * @return
	 */
	public static String generateProcessTime() {
		DateTime dateTime = new DateTime();
		return dateTime.toString(TIME_FORMAT_DAY);
	}

	public static String getNowTime() {
		DateTime dateTime = new DateTime();
		return dateTime.toString(FULL_TIME_FORMAT_DAY);
	}

	public static boolean timeFlag() {
		return true;
//		DateTime dateTime = new DateTime();
//		Integer hour = dateTime.getHourOfDay();
//		if(hour > TIME_FLAG_START || hour < TIME_FLAG_END){
//			return true;
//		}
//		return false;
	}


	/**
	 * 进行程序启动的初始状态检测.如果日切 已经 开始 且 没有完成。则进行consumer的pause操作
	 * @return
	 */
	public static boolean startCheck(String processTime,Properties properties) {
		//flink 程序今日是否触发过暂停逻辑
		String checkpointSql = properties.getProperty("database.checkpoint.sql");
		String decodeCheckSql = decode(checkpointSql);
		//String flinkStateSql = "select count(1) from switch_checkpoint_status s where  s.checkpoint_completed = '1' and switch_date = '";
		String flinkStateSql = decodeCheckSql + processTime +"'";
		Integer flinkStateFlag = JDBCHelper.getAll(JDBCHelper.getStateConn(properties),flinkStateSql);

		//oracle库是否已经完成 日切。

		String querySql = properties.getProperty("database.query.sql");
		String decodeOracleSql = decode(querySql);
		//String oracleSql = "select count(1) from BA_EOD_RESTART b where b.time = '" + processTime + "'";
		String oracleSql = decodeOracleSql + processTime + "'";
		Integer oracleFlag = JDBCHelper.getAll(JDBCHelper.getConn(properties),oracleSql);

		if(flinkStateFlag >= 1 && oracleFlag == 0){
			return true;
		}else {
			return false;
		}
	}

	/**
	 * 如果未指定,则用当日日期
	 * 如果指定了,就用指定的日期
	 *
	 * @param properties
	 * @return
	 */
	public static String generateProcessTime(Properties properties) {
		String processTime = properties.getProperty("init.process.time");
		if (processTime == null || "".equals(processTime)) {
			return generateProcessTime();
		}
		//格式判断 如果出问题 直接报错
		return processTime;
	}


	public static Boolean pauseEnable(Properties properties) {
		String processTime = properties.getProperty("event.pause.enable");
		if (processTime != null && ENABLE_FLAG.equals(processTime)) {
			return true;
		}
		//格式判断 如果出问题 直接报错
		return false;
	}

	public static Boolean saveStateEnable(Properties properties) {
		String processTime = properties.getProperty("save.database.enable");
		if (processTime != null && ENABLE_FLAG.equals(processTime)) {
			return true;
		}
		//格式判断 如果出问题 直接报错
		return false;
	}


	/**
	 * 比较处理日期和柜面日期
	 * 先对数据进行格式化截取后再做比较
	 * 判断新的数据是否是新的账面日期的数据 则需要根据日期做 + 1 操作后 再判断是否相等
	 *
	 * @param dataTime    kafka数据的dataTime 为完整时间戳格式
	 * @param processTime 为标准的yyyyMMdd格式
	 * @return
	 */
	public static Boolean compareProcessTimeAndDataTime(String dataTime, String processTime) {
		if (dataTime != null && dataTime.length() > dayTimeLength) {
			// 截取日期部分并去除 - 字符串
			String dayTime = dataTime.substring(0, dayTimeLength).replace("-", "");
			// 判断是否为新的一天
			/// return !processTime.equals(dayTime);
			if (increaseProcessTime(processTime).equals(dayTime)) {
				System.out.println("比较dataTime 和 processTime -- 此时为true 需要进行暂停" + dataTime+"  -  "+processTime);
				//如果时间时间比处理时间正好大一天,则返回true需要进行暂停
				return true;
			} else {
				return false;
			}
		} else {
			return false;
		}
	}


	/**
	 * 将处理柜面日期新增一天
	 *
	 * @param oldTime
	 * @return
	 */
	public static synchronized String increaseProcessTime(String oldTime) {
		DateTimeFormatter dateTimeFormatter = DateTimeFormat.forPattern(TIME_FORMAT_DAY);
		DateTime dateTime = dateTimeFormatter.parseDateTime(oldTime);
		String dateTimePlus =  dateTime.plusDays(1).toString(TIME_FORMAT_DAY);
		//System.out.println("增加一天后的新的一天日期: " + dateTimePlus);
		return dateTimePlus;
	}

	public static String decode(String encodeString) {
		if (encodeString != null) {
			return new String(Base64.getDecoder().decode(encodeString));
		} else {
			return "";
		}
	}

	/**
	 * 开始暂停
	 *
	 * @return
	 */
	public static void saveState(Boolean flag,Properties kafkaProperties, String processTime,String desc) {
		System.err.print("ProcessUtils状态,processTime:"+processTime+" desc: "+desc);
		if(flag){
			JDBCHelper.updateState(kafkaProperties, processTime, desc);
		}
	}
}

类2：负责恢复读取消息

package org.apache.flink.streaming.connectors.kafka.internal;

import java.util.Properties;

/**
 * @author hulb
 * @date 2019-10-12 16:54
 */
public class ResumeThread extends Thread {

	private String processTime;
	private Properties kafkaProperties;
	private KafkaConsumerThread consumer;

	public ResumeThread(String processTime,Properties kafkaProperties,KafkaConsumerThread consumer){

		this.processTime = processTime;
		this.kafkaProperties = kafkaProperties;
		this.consumer= consumer;
	}


	@Override
	public void run(){
		String querySql = kafkaProperties.getProperty("database.query.sql");
		String sql = ProcessUtils.decode(querySql) + processTime + "'";
		while (true) {
			try {
				//条件1.日切结束
				Integer result = JDBCHelper.getAll(JDBCHelper.getConn(kafkaProperties), sql);
				//条件2. 余额数据成功同步到hbase中。如果任务没有同步成功。？
				//如果有值 则break
				if (result > 0) {
					consumer.setResumed(true);
					consumer.setResumedThreadStart(false);
					break;
				} else {
					//查询
					Thread.sleep(10000);
				}
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
		}
	}


}

类3：

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.connectors.kafka.internal;

import com.alibaba.fastjson.JSONObject;
import org.apache.flink.annotation.Internal;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.io.ratelimiting.FlinkConnectorRateLimiter;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.metrics.MetricGroup;
import org.apache.flink.streaming.connectors.kafka.internals.ClosableBlockingQueue;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaCommitCallback;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionState;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionStateSentinel;
import org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper;

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.clients.consumer.OffsetCommitCallback;
import org.apache.kafka.common.Metric;
import org.apache.kafka.common.MetricName;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.WakeupException;
import org.slf4j.Logger;

import javax.annotation.Nonnull;

import java.util.*;
import java.util.concurrent.atomic.AtomicReference;

import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * The thread the runs the {@link KafkaConsumer}, connecting to the brokers and polling records.
 * The thread pushes the data into a {@link Handover} to be picked up by the fetcher that will
 * deserialize and emit the records.
 *
 * <p><b>IMPORTANT:</b> This thread must not be interrupted when attempting to shut it down.
 * The Kafka consumer code was found to not always handle interrupts well, and to even
 * deadlock in certain situations.
 *
 * <p>Implementation Note: This code is written to be reusable in later versions of the KafkaConsumer.
 * Because Kafka is not maintaining binary compatibility, we use a "call bridge" as an indirection
 * to the KafkaConsumer calls that change signature.
 */
@Internal
public class KafkaConsumerThread extends Thread {

	/**
	 * Logger for this consumer.
	 */
	private final Logger log;

	/**
	 * The handover of data and exceptions between the consumer thread and the task thread.
	 */
	private final Handover handover;

	/**
	 * The next offsets that the main thread should commit and the commit callback.
	 */
	private final AtomicReference<Tuple2<Map<TopicPartition, OffsetAndMetadata>, KafkaCommitCallback>> nextOffsetsToCommit;

	/**
	 * The configuration for the Kafka consumer.
	 */
	private final Properties kafkaProperties;

	/**
	 * The queue of unassigned partitions that we need to assign to the Kafka consumer.
	 */
	private final ClosableBlockingQueue<KafkaTopicPartitionState<TopicPartition>> unassignedPartitionsQueue;

	/**
	 * The indirections on KafkaConsumer methods, for cases where KafkaConsumer compatibility is broken.
	 */
	private final KafkaConsumerCallBridge09 consumerCallBridge;

	/**
	 * The maximum number of milliseconds to wait for a fetch batch.
	 */
	private final long pollTimeout;

	/**
	 * Flag whether to add Kafka's metrics to the Flink metrics.
	 */
	private final boolean useMetrics;

	/**
	 * @deprecated We should only be publishing to the {{@link #consumerMetricGroup}}.
	 * This is kept to retain compatibility for metrics.
	 **/
	@Deprecated
	private final MetricGroup subtaskMetricGroup;

	/**
	 * We get this from the outside to publish metrics.
	 */
	private final MetricGroup consumerMetricGroup;

	/**
	 * Reference to the Kafka consumer, once it is created.
	 */
	private volatile KafkaConsumer<byte[], byte[]> consumer;

	/**
	 * This lock is used to isolate the consumer for partition reassignment.
	 */
	private final Object consumerReassignmentLock;

	/**
	 * Indication if this consumer has any assigned partition.
	 */
	private boolean hasAssignedPartitions;

	/**
	 * Flag to indicate whether an external operation ({@link #setOffsetsToCommit(Map, KafkaCommitCallback)}
	 * or {@link #shutdown()}) had attempted to wakeup the consumer while it was isolated for partition reassignment.
	 */
	private volatile boolean hasBufferedWakeup;

	/**
	 * Flag to mark the main work loop as alive.
	 */
	private volatile boolean running;

	/**
	 * Flag tracking whether the latest commit request has completed.
	 */
	private volatile boolean commitInProgress;

	/**
	 * Ratelimiter.
	 */
	private FlinkConnectorRateLimiter rateLimiter;

	/**
	 * 任务是否暂停
	 */
	private volatile boolean pausing;

	public boolean isResumed() {
		return resumed;
	}

	public void setResumed(boolean resumed) {
		this.resumed = resumed;
	}

	private volatile boolean resumed;

	public void setResumedThreadStart(boolean resumedThreadStart) {
		this.resumedThreadStart = resumedThreadStart;
	}

	private volatile boolean resumedThreadStart;

	/**
	 * 是否 是暂停后的 第一个"checkPoint"
	 */
	private boolean firstCheckpointOnPause;

	/**
	 * 今日份的处理时间 比如20190506日只处理0506日的数据 一旦遇到0507账面日期的数据就进行加1
	 */
	private volatile String processTime = "";
	//private static volatile AtomicInteger atomicIntegerProcessTime;
	private volatile Boolean pauseEnable;
	private volatile Boolean saveStateEnable;
	private volatile Boolean firstCheck;
	private static String PAUSE_MEAAGE = "{\"table\":\"PAUSE\",\"op_type\":\"I\",\"after\":{\"PAUSE\":\"PAUSE\"}}";

	public KafkaConsumerThread(
		Logger log,
		Handover handover,
		Properties kafkaProperties,
		ClosableBlockingQueue<KafkaTopicPartitionState<TopicPartition>> unassignedPartitionsQueue,
		KafkaConsumerCallBridge09 consumerCallBridge,
		String threadName,
		long pollTimeout,
		boolean useMetrics,
		MetricGroup consumerMetricGroup,
		MetricGroup subtaskMetricGroup,
		FlinkConnectorRateLimiter rateLimiter) {

		super(threadName);
		setDaemon(true);

		this.log = checkNotNull(log);
		this.handover = checkNotNull(handover);
		this.kafkaProperties = checkNotNull(kafkaProperties);
		this.consumerMetricGroup = checkNotNull(consumerMetricGroup);
		this.subtaskMetricGroup = checkNotNull(subtaskMetricGroup);
		this.consumerCallBridge = checkNotNull(consumerCallBridge);

		this.unassignedPartitionsQueue = checkNotNull(unassignedPartitionsQueue);

		this.pollTimeout = pollTimeout;
		this.useMetrics = useMetrics;

		this.consumerReassignmentLock = new Object();
		this.nextOffsetsToCommit = new AtomicReference<>();
		this.running = true;

		if (rateLimiter != null) {
			this.rateLimiter = rateLimiter;
		}

		/**
		 * 生成初始化的数据
		 */
		processTime = ProcessUtils.generateProcessTime(kafkaProperties);
		this.pauseEnable = ProcessUtils.pauseEnable(kafkaProperties);
		this.saveStateEnable = ProcessUtils.saveStateEnable(kafkaProperties);
		this.firstCheck = true;
	}

	// ------------------------------------------------------------------------

	@Override
	public void run() {
		// early exit check
		if (!running) {
			return;
		}

		// this is the means to talk to FlinkKafkaConsumer's main thread
		final Handover handover = this.handover;

		// This method initializes the KafkaConsumer and guarantees it is torn down properly.
		// This is important, because the consumer has multi-threading issues,
		// including concurrent 'close()' calls.
		try {
			this.consumer = getConsumer(kafkaProperties);
		} catch (Throwable t) {
			handover.reportError(t);
			return;
		}

		// from here on, the consumer is guaranteed to be closed properly
		try {
			// register Kafka's very own metrics in Flink's metric reporters
			if (useMetrics) {
				// register Kafka metrics to Flink
				Map<MetricName, ? extends Metric> metrics = consumer.metrics();
				if (metrics == null) {
					// MapR's Kafka implementation returns null here.
					log.info("Consumer implementation does not support metrics");
				} else {
					// we have Kafka metrics, register them
					for (Map.Entry<MetricName, ? extends Metric> metric : metrics.entrySet()) {
						consumerMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));

						// TODO this metric is kept for compatibility purposes; should remove in the future
						subtaskMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
					}
				}
			}

			// early exit check
			if (!running) {
				return;
			}

			// the latest bulk of records. May carry across the loop if the thread is woken up
			// from blocking on the handover
			ConsumerRecords<byte[], byte[]> records = null;

			// reused variable to hold found unassigned new partitions.
			// found partitions are not carried across loops using this variable;
			// they are carried across via re-adding them to the unassigned partitions queue
			List<KafkaTopicPartitionState<TopicPartition>> newPartitions;


			// main fetch loop
			while (running) {

				// check if there is something to commit
				if (!commitInProgress) {
					// get and reset the work-to-be committed, so we don't repeatedly commit the same
					final Tuple2<Map<TopicPartition, OffsetAndMetadata>, KafkaCommitCallback> commitOffsetsAndCallback =
						nextOffsetsToCommit.getAndSet(null);

					if (commitOffsetsAndCallback != null) {
						log.debug("Sending async offset commit request to Kafka broker");

						// also record that a commit is already in progress
						// the order here matters! first set the flag, then send the commit command.
						commitInProgress = true;
						if (pausing && firstCheckpointOnPause) {
							System.out.println("进行暂停后的第一次offset提交"+ProcessUtils.getNowTime());
							// 暂停
							consumer.commitSync(commitOffsetsAndCallback.f0);
							commitInProgress = false;
							/**
							 * TODO 将topic 写入oracle状态表，格式：  日期、 topicName 、状态（已做过checkpoint)
							 * TODO 当订阅的所有topic 都在暂停后 做过 checkpoint，重启应用
							 *
							 */
							Set<TopicPartition> topicPartitions = consumer.assignment();
							//commitOffsetsAndCallback.f0.get("ds").offset();
							for (TopicPartition topicPartition : topicPartitions) {
								System.out.println("进行暂停后的第一次offset提交,topic:"+topicPartition.topic()+ProcessUtils.getNowTime());
								JDBCHelper.saveCheckpointState(kafkaProperties, processTime, "0", topicPartition.topic(), "1");
							}
							//置为false 这样在pause后的第二个checkpoint就不会将状态写入到oracle中了。
							firstCheckpointOnPause = false;
						} else {
							consumer.commitAsync(commitOffsetsAndCallback.f0, new CommitCallback(commitOffsetsAndCallback.f1));
						}
					}
				}

				try {
					if (hasAssignedPartitions) {
						newPartitions = unassignedPartitionsQueue.pollBatch();
					} else {
						// if no assigned partitions block until we get at least one
						// instead of hot spinning this loop. We rely on a fact that
						// unassignedPartitionsQueue will be closed on a shutdown, so
						// we don't block indefinitely
						newPartitions = unassignedPartitionsQueue.getBatchBlocking();
					}
					if (newPartitions != null) {
						reassignPartitions(newPartitions);
					}
				} catch (AbortedReassignmentException e) {
					continue;
				}

				if (!hasAssignedPartitions) {
					// Without assigned partitions KafkaConsumer.poll will throw an exception
					continue;
				}

				if (firstCheck) {
					//判断是否在日切好，如果没有日切好，直接暂停。
					System.out.println("进行是否暂停判断");
					if (ProcessUtils.startCheck(processTime, kafkaProperties)) {
						System.out.println("暂停==============pauseTopicPartitions");
						consumerCallBridge.pauseTopicPartitions(consumer, consumer.assignment());
						if(!this.resumedThreadStart){
							System.out.println("启动监听日切thread");
							ResumeThread resumeThread = new ResumeThread(processTime, kafkaProperties, this);
							resumeThread.start();
							this.resumedThreadStart=true;
						}
					}
					//只有重启的时候需要检查一次今日是否做过一次暂停，且日切没有结束
					firstCheck = false;
				}


				SimpleStringSchema simpleStringSchema = new SimpleStringSchema();
				// get the next batch of records, unless we did not manage to hand the old batch over
				if (records == null) {
					try {
						records = getRecordsFromKafka();
					} catch (WakeupException we) {
						continue;
					}
				}
				Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> pendingRecords = new HashMap<>();
				Iterator<ConsumerRecord<byte[], byte[]>> recordIter = records.iterator();
				ConsumerRecord<byte[], byte[]> record;
				while (recordIter.hasNext()) {
					record = recordIter.next();
					final String value = simpleStringSchema.deserialize(record.value());
					/// 临时暂停的逻辑 如果发了一条value是让程序临时暂停的 但是不能把这条消息阻塞在这里， 要继续往下走，让这条数据被消费掉。


					//TODO 如果时间在晚上10点到第二天2点前，才进行判断，否则跳过。
					Boolean timeFlag = ProcessUtils.timeFlag();
					if (pauseEnable && timeFlag) {
						if(PAUSE_MEAAGE.equals(value)){
							pausing = true;
							TopicPartition tp = new TopicPartition(record.topic(), record.partition());
							if (!pendingRecords.containsKey(tp)) {
								pendingRecords.put(tp, new LinkedList<>());
							}
							pendingRecords.get(tp).add(record);
						}else{
							try {
								JSONObject jsonObject = JSONObject.parseObject(value);
								String tableName = jsonObject.getString("table");
								String chTableName = kafkaProperties.getProperty("table.name.ch");
								// 只有chnobook才会触发判断
								if (chTableName.equals(tableName)) {
									String dataObject = kafkaProperties.getProperty("table.data.object");
									String timeColumn = kafkaProperties.getProperty("table.time.column");
									// 获得日期字段
									String dataTime = jsonObject
										.getJSONObject(dataObject)
										.getString(timeColumn);
									if (ProcessUtils.compareProcessTimeAndDataTime(dataTime, processTime)) {
										pausing = true;
									}
								}
							} catch (Exception e) {
								continue;
							}
						}
					}

					// TODO 如果开始日切
					if (pausing && !resumedThreadStart) {
						ProcessUtils.saveState(saveStateEnable, kafkaProperties, processTime, "start pause");
						consumerCallBridge.pauseTopicPartitions(consumer, consumer.assignment());
						firstCheckpointOnPause = true;
						//启动新的进程监听 日切状态 如果日切状态OK,将恢复标识resume 置为true 。下面逻辑即可恢复。 重新读取程序。
						if(!resumedThreadStart){
							System.out.println("启动监听日切thread");
							ResumeThread resumeThread = new ResumeThread(processTime, kafkaProperties, this);
							resumeThread.start();
							ProcessUtils.saveState(saveStateEnable, kafkaProperties, processTime, "start waitCompute");
							this.resumedThreadStart=true;
						}

					}

					if(!pausing){
						TopicPartition tp = new TopicPartition(record.topic(), record.partition());
						if (!pendingRecords.containsKey(tp)) {
							pendingRecords.put(tp, new LinkedList<>());
						}
						pendingRecords.get(tp).add(record);
					}
				}
				if (resumed) {
					processTime = ProcessUtils.increaseProcessTime(processTime);
					System.out.println("增加一天后的新的processTime日期: " + processTime);
					ProcessUtils.saveState(saveStateEnable, kafkaProperties, processTime, "start resume");
					consumerCallBridge.resumeTopicPartitions(consumer, consumer.assignment());
					ProcessUtils.saveState(saveStateEnable, kafkaProperties, processTime, "resumed");
					//恢复标识
					firstCheckpointOnPause = false;
					//将暂停标识置为 false 表示已经处于非暂停状态
					pausing = false;
					//将需要恢复标识置为false 防止下次进入此逻辑进行恢复
					resumed = false;
				}
				//如果是处于暂停状态（防止pendingRecords为空 而将错误的record往下传递） 或者 record不为空
				if (pausing || !pendingRecords.isEmpty()) {
					records = new ConsumerRecords<>(pendingRecords);
				}
				try {
					handover.produce(records);
					records = null;
				} catch (Handover.WakeupException e) {
					// fall through the loop
				}
			}
			// end main fetch loop
		} catch (Throwable t) {
			// let the main thread know and exit
			// it may be that this exception comes because the main thread closed the handover, in
			// which case the below reporting is irrelevant, but does not hurt either
			handover.reportError(t);
		} finally {
			// make sure the handover is closed if it is not already closed or has an error
			handover.close();

			// If a ratelimiter was created, make sure it's closed.
			if (rateLimiter != null) {
				rateLimiter.close();
			}

			// make sure the KafkaConsumer is closed
			try {
				consumer.close();
			} catch (Throwable t) {
				log.warn("Error while closing Kafka consumer", t);
			}
		}
	}

	/**
	 * Shuts this thread down, waking up the thread gracefully if blocked (without Thread.interrupt() calls).
	 */
	public void shutdown() {
		running = false;

		// wake up all blocking calls on the queue
		unassignedPartitionsQueue.close();

		// We cannot call close() on the KafkaConsumer, because it will actually throw
		// an exception if a concurrent call is in progress

		// this wakes up the consumer if it is blocked handing over records
		handover.wakeupProducer();

		// this wakes up the consumer if it is blocked in a kafka poll
		synchronized (consumerReassignmentLock) {
			if (consumer != null) {
				consumer.wakeup();
			} else {
				// the consumer is currently isolated for partition reassignment;
				// set this flag so that the wakeup state is restored once the reassignment is complete
				hasBufferedWakeup = true;
			}
		}

		// If a ratelimiter was created, make sure it's closed.
		if (rateLimiter != null) {
			rateLimiter.close();
		}
	}

	/**
	 * Tells this thread to commit a set of offsets. This method does not block, the committing
	 * operation will happen asynchronously.
	 *
	 * <p>Only one commit operation may be pending at any time. If the committing takes longer than
	 * the frequency with which this method is called, then some commits may be skipped due to being
	 * superseded by newer ones.
	 *
	 * @param offsetsToCommit The offsets to commit
	 * @param commitCallback  callback when Kafka commit completes
	 */
	void setOffsetsToCommit(
		Map<TopicPartition, OffsetAndMetadata> offsetsToCommit,
		@Nonnull KafkaCommitCallback commitCallback) {

		// record the work to be committed by the main consumer thread and make sure the consumer notices that
		if (nextOffsetsToCommit.getAndSet(Tuple2.of(offsetsToCommit, commitCallback)) != null) {
			log.warn("Committing offsets to Kafka takes longer than the checkpoint interval. " +
				"Skipping commit of previous offsets because newer complete checkpoint offsets are available. " +
				"This does not compromise Flink's checkpoint integrity.");
		}

		// if the consumer is blocked in a poll() or handover operation, wake it up to commit soon
		handover.wakeupProducer();

		synchronized (consumerReassignmentLock) {
			if (consumer != null) {
				consumer.wakeup();
			} else {
				// the consumer is currently isolated for partition reassignment;
				// set this flag so that the wakeup state is restored once the reassignment is complete
				hasBufferedWakeup = true;
			}
		}
	}

	// ------------------------------------------------------------------------

	/**
	 * Reestablishes the assigned partitions for the consumer.
	 * The reassigned partitions consists of the provided new partitions and whatever partitions
	 * was already previously assigned to the consumer.
	 *
	 * <p>The reassignment process is protected against wakeup calls, so that after
	 * this method returns, the consumer is either untouched or completely reassigned
	 * with the correct offset positions.
	 *
	 * <p>If the consumer was already woken-up prior to a reassignment resulting in an
	 * interruption any time during the reassignment, the consumer is guaranteed
	 * to roll back as if it was untouched. On the other hand, if there was an attempt
	 * to wakeup the consumer during the reassignment, the wakeup call is "buffered"
	 * until the reassignment completes.
	 *
	 * <p>This method is exposed for testing purposes.
	 */
	@VisibleForTesting
	void reassignPartitions(List<KafkaTopicPartitionState<TopicPartition>> newPartitions) throws Exception {
		if (newPartitions.size() == 0) {
			return;
		}
		hasAssignedPartitions = true;
		boolean reassignmentStarted = false;

		// since the reassignment may introduce several Kafka blocking calls that cannot be interrupted,
		// the consumer needs to be isolated from external wakeup calls in setOffsetsToCommit() and shutdown()
		// until the reassignment is complete.
		final KafkaConsumer<byte[], byte[]> consumerTmp;
		synchronized (consumerReassignmentLock) {
			consumerTmp = this.consumer;
			this.consumer = null;
		}

		final Map<TopicPartition, Long> oldPartitionAssignmentsToPosition = new HashMap<>();
		try {
			for (TopicPartition oldPartition : consumerTmp.assignment()) {
				oldPartitionAssignmentsToPosition.put(oldPartition, consumerTmp.position(oldPartition));
			}

			final List<TopicPartition> newPartitionAssignments =
				new ArrayList<>(newPartitions.size() + oldPartitionAssignmentsToPosition.size());
			newPartitionAssignments.addAll(oldPartitionAssignmentsToPosition.keySet());
			newPartitionAssignments.addAll(convertKafkaPartitions(newPartitions));

			// reassign with the new partitions
			consumerCallBridge.assignPartitions(consumerTmp, newPartitionAssignments);
			reassignmentStarted = true;

			// old partitions should be seeked to their previous position
			for (Map.Entry<TopicPartition, Long> oldPartitionToPosition : oldPartitionAssignmentsToPosition.entrySet()) {
				consumerTmp.seek(oldPartitionToPosition.getKey(), oldPartitionToPosition.getValue());
			}

			// offsets in the state of new partitions may still be placeholder sentinel values if we are:
			//   (1) starting fresh,
			//   (2) checkpoint / savepoint state we were restored with had not completely
			//       been replaced with actual offset values yet, or
			//   (3) the partition was newly discovered after startup;
			// replace those with actual offsets, according to what the sentinel value represent.
			for (KafkaTopicPartitionState<TopicPartition> newPartitionState : newPartitions) {
				if (newPartitionState.getOffset() == KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET) {
					consumerCallBridge.seekPartitionToBeginning(consumerTmp, newPartitionState.getKafkaPartitionHandle());
					newPartitionState.setOffset(consumerTmp.position(newPartitionState.getKafkaPartitionHandle()) - 1);
				} else if (newPartitionState.getOffset() == KafkaTopicPartitionStateSentinel.LATEST_OFFSET) {
					consumerCallBridge.seekPartitionToEnd(consumerTmp, newPartitionState.getKafkaPartitionHandle());
					newPartitionState.setOffset(consumerTmp.position(newPartitionState.getKafkaPartitionHandle()) - 1);
				} else if (newPartitionState.getOffset() == KafkaTopicPartitionStateSentinel.GROUP_OFFSET) {
					// the KafkaConsumer by default will automatically seek the consumer position
					// to the committed group offset, so we do not need to do it.

					newPartitionState.setOffset(consumerTmp.position(newPartitionState.getKafkaPartitionHandle()) - 1);
				} else {
					consumerTmp.seek(newPartitionState.getKafkaPartitionHandle(), newPartitionState.getOffset() + 1);
				}
			}
		} catch (WakeupException e) {
			// a WakeupException may be thrown if the consumer was invoked wakeup()
			// before it was isolated for the reassignment. In this case, we abort the
			// reassignment and just re-expose the original consumer.

			synchronized (consumerReassignmentLock) {
				this.consumer = consumerTmp;

				// if reassignment had already started and affected the consumer,
				// we do a full roll back so that it is as if it was left untouched
				if (reassignmentStarted) {
					consumerCallBridge.assignPartitions(
						this.consumer, new ArrayList<>(oldPartitionAssignmentsToPosition.keySet()));

					for (Map.Entry<TopicPartition, Long> oldPartitionToPosition : oldPartitionAssignmentsToPosition.entrySet()) {
						this.consumer.seek(oldPartitionToPosition.getKey(), oldPartitionToPosition.getValue());
					}
				}

				// no need to restore the wakeup state in this case,
				// since only the last wakeup call is effective anyways
				hasBufferedWakeup = false;

				// re-add all new partitions back to the unassigned partitions queue to be picked up again
				for (KafkaTopicPartitionState<TopicPartition> newPartition : newPartitions) {
					unassignedPartitionsQueue.add(newPartition);
				}

				// this signals the main fetch loop to continue through the loop
				throw new AbortedReassignmentException();
			}
		}

		// reassignment complete; expose the reassigned consumer
		synchronized (consumerReassignmentLock) {
			this.consumer = consumerTmp;

			// restore wakeup state for the consumer if necessary
			if (hasBufferedWakeup) {
				this.consumer.wakeup();
				hasBufferedWakeup = false;
			}
		}
	}

	@VisibleForTesting
	KafkaConsumer<byte[], byte[]> getConsumer(Properties kafkaProperties) {
		return new KafkaConsumer<>(kafkaProperties);
	}

	@VisibleForTesting
	FlinkConnectorRateLimiter getRateLimiter() {
		return rateLimiter;
	}

	// -----------------------------------------------------------------------
	// Rate limiting methods
	// -----------------------------------------------------------------------

	/**
	 * @param records List of ConsumerRecords.
	 * @return Total batch size in bytes, including key and value.
	 */
	private int getRecordBatchSize(ConsumerRecords<byte[], byte[]> records) {
		int recordBatchSizeBytes = 0;
		for (ConsumerRecord<byte[], byte[]> record : records) {
			// Null is an allowed value for the key
			if (record.key() != null) {
				recordBatchSizeBytes += record.key().length;
			}
			recordBatchSizeBytes += record.value().length;

		}
		return recordBatchSizeBytes;
	}

	/**
	 * Get records from Kafka. If the rate-limiting feature is turned on, this method is called at
	 * a rate specified by the {@link #rateLimiter}.
	 *
	 * @return ConsumerRecords
	 */
	@VisibleForTesting
	protected ConsumerRecords<byte[], byte[]> getRecordsFromKafka() {
		ConsumerRecords<byte[], byte[]> records = consumer.poll(pollTimeout);
		if (rateLimiter != null) {
			int bytesRead = getRecordBatchSize(records);
			rateLimiter.acquire(bytesRead);
		}
		return records;
	}


	// ------------------------------------------------------------------------
	//  Utilities
	// ------------------------------------------------------------------------

	private static List<TopicPartition> convertKafkaPartitions(List<KafkaTopicPartitionState<TopicPartition>> partitions) {
		ArrayList<TopicPartition> result = new ArrayList<>(partitions.size());
		for (KafkaTopicPartitionState<TopicPartition> p : partitions) {
			result.add(p.getKafkaPartitionHandle());
		}
		return result;
	}

	private class CommitCallback implements OffsetCommitCallback {

		private final KafkaCommitCallback internalCommitCallback;

		CommitCallback(KafkaCommitCallback internalCommitCallback) {
			this.internalCommitCallback = checkNotNull(internalCommitCallback);
		}

		@Override
		public void onComplete(Map<TopicPartition, OffsetAndMetadata> offsets, Exception ex) {
			commitInProgress = false;

			if (ex != null) {
				log.warn("Committing offsets to Kafka failed. This does not compromise Flink's checkpoints.", ex);
				internalCommitCallback.onException(ex);
			} else {
				internalCommitCallback.onSuccess();
			}
		}
	}

	private class ResumeCallback extends Thread {
		private String processTime;
		private Properties kafkaProperties;

		ResumeCallback() {
		}

		public void onComplete(Map<TopicPartition, OffsetAndMetadata> offsets, Exception ex) {
			pausing = false;
		}
	}

	/**
	 * Utility exception that serves as a signal for the main loop to continue through the loop
	 * if a reassignment attempt was aborted due to an pre-reassignment wakeup call on the consumer.
	 */
	private static class AbortedReassignmentException extends Exception {
		private static final long serialVersionUID = 1L;
	}
}

类4:jdbc工具

package org.apache.flink.streaming.connectors.kafka.internal;

import java.sql.*;
import java.util.Properties;

/**
 * @author hulb
 * @date 2019-05-07 14:35
 */
public class JDBCHelper {

	/**
	 * 获取数据库连接 TODO 这里需要优化代码
	 *
	 * @param driver   驱动名称
	 * @param url      连接地址
	 * @param userName 用户名
	 * @param password 密码
	 * @return
	 */
	private static Connection getConn(String url, String driver, String userName, String password) {
		Connection conn = null;
		try {
			Class.forName(driver);
			conn = DriverManager.getConnection(url, userName, password);
		} catch (ClassNotFoundException e) {
			e.printStackTrace();
		} catch (SQLException e) {
			e.printStackTrace();
		}
		return conn;
	}

	/**
	 * 是日切的系统库
	 * @param kafkaProperties
	 * @return
	 */
	public static Connection getConn(Properties kafkaProperties) {
		String url = kafkaProperties.getProperty("database.url");
		String driver = kafkaProperties.getProperty("database.driver");
		String userName = kafkaProperties.getProperty("database.username");
		String password = kafkaProperties.getProperty("database.password");
		return getConn(url, driver, userName, password);
	}

	/**
	 * 是用户自定义的oracle库 保存state等信息
	 * @param kafkaProperties
	 * @return
	 */
	public static Connection getStateConn(Properties kafkaProperties) {
		String url = kafkaProperties.getProperty("database.state.url");
		String driver = kafkaProperties.getProperty("database.state.driver");
		String userName = kafkaProperties.getProperty("database.state.username");
		String password = kafkaProperties.getProperty("database.state.password");
		return getConn(url, driver, userName, password);
	}


	/**
	 * 根据指定参数信息和查询sql 查询 oracle 数据
	 *
	 * @param querySql
	 * @return
	 */
	public static Integer getAll(Connection conn, String querySql) {
		PreparedStatement pstmt;
		Integer count = 0;
		try {
			pstmt = conn.prepareStatement(querySql);
			ResultSet rs = pstmt.executeQuery();
			int col = rs.getMetaData().getColumnCount();
			//解析 count的结果 是 0 还是大于 0
			while (rs.next()) {
				for (int i = 1; i <= col; i++) {
					count = rs.getInt(1);
				}
			}
		} catch (SQLException e) {
			e.printStackTrace();
		} finally {
			try {
				conn.close();
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
		return count;
	}

	/**
	 * 更新当前状态
	 * 1. 开始暂停等待的时候写一次数据库状态 （processTime 开始等待）
	 * 2. 等待完毕 继续执行任务的时候写一次数据库状态 （先查询之前有没有这个状态,有则修改，没有则新增）
	 *
	 * @return
	 */
	public static Integer updateState(Properties kafkaProperties,
									  String processTime, String status) {

		String statTable = kafkaProperties.getProperty("database.state.table");
		String sql = String.format("INSERT INTO %s(create_time,process_time,run_desc) VALUES('%s','%s','%s')",
			statTable, ProcessUtils.getNowTime(), processTime, status);
		System.out.println(sql);
		return executeUpdate(kafkaProperties, sql);
	}

	/**
	 * 将checkpoint结果写到oracle库中
	 *
	 * @param kafkaProperties
	 * @param processTime
	 * @param status
	 * @return
	 */
	public static Integer saveCheckpointState(Properties kafkaProperties,
											  String processTime, String status, String topic, String checkpointCompleted) {
		System.out.println("将topic是否进行checkpoint写入到oracle中");
		String statTable = kafkaProperties.getProperty("database.checkpoint.table", "switch_checkpoint_status");
		String groupId = kafkaProperties.getProperty("group.id", "group_id");
		String sql = String.format("INSERT INTO %s(switch_date,topic,group_id,checkpoint_completed) VALUES('%s','%s','%s','%s')",
			statTable, processTime, topic, groupId, checkpointCompleted);
		System.out.println(sql);
		return executeUpdate(kafkaProperties, sql);
	}



	/**
	 * 根据指定的数据库连接配置进行update 或者insert 操作
	 *
	 * @param kafkaProperties
	 * @param sql
	 * @return
	 */
	public static Integer executeUpdate(Properties kafkaProperties, String sql) {
		int rtn = 0;
		Connection conn = null;
		PreparedStatement pstmt = null;
		try {
			conn = getStateConn(kafkaProperties);
			conn.setAutoCommit(false);
			pstmt = conn.prepareStatement(sql);
			rtn = pstmt.executeUpdate();
			conn.commit();
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				conn.close();
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
		System.out.println("执行成功:"+sql);
		return rtn;
	}

}

唐予之_

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
改写flink kafka consumer实现自定义读取消息及控制

背景：flink1.8实现暂停消费kafka并能接受到通知，继续消费kafka需求描述：实现思路类1：package org.apache.flink.streaming.connectors.kafka.internal;import org.apache.kafka.clients.producer.KafkaProducer;import org.apache.kafka.clients.producer.ProducerConfig;import org.apache.kafk
复制链接

扫一扫

专栏目录