由于生产上需要用到debezium采集oracle的数据,所以本着用一个工具就需要了解其运行原理的初衷,对debezium相关的源码进行分析。
好了,直接开始吧。
首先,我们需要通过一段代码找到debezium执行的入口。
相关的pom依赖:
<dependency>
<groupId>io.debezium</groupId>
<artifactId>debezium-api</artifactId>
<version>${version.debezium}</version>
</dependency>
<dependency>
<groupId>io.debezium</groupId>
<artifactId>debezium-connector-mysql</artifactId>
<version>${version.debezium}</version>
</dependency>
<dependency>
<groupId>io.debezium</groupId>
<artifactId>debezium-embedded</artifactId>
<version>${version.debezium}</version>
</dependency>
<dependency>
<groupId>io.debezium</groupId>
<artifactId>debezium-connector-oracle</artifactId>
<version>${version.debezium}</version>
</dependency>
<dependency>
<groupId>com.oracle.database.jdbc</groupId>
<artifactId>ojdbc8</artifactId>
<version>21.1.0.0</version>
</dependency>
对应的执行代码:
package com.zyh.debezium.converter;
import io.debezium.engine.ChangeEvent;
import io.debezium.engine.DebeziumEngine;
import io.debezium.engine.format.Json;
import io.debezium.relational.history.FileDatabaseHistory;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import io.debezium.connector.oracle.OracleConnector;
import io.debezium.connector.oracle.OracleConnectorTask;
/**
* 使用debezium采集oracle数据
* @author xxxx
*/
public class OracleCdcByDebezium {
public static void main(String[] args) {
// 1. 生成配置
Properties props = genProps();
// 2. 业务处理逻辑部分代码
DebeziumEngine<ChangeEvent<String, String>> engine = engineBuild(props);
// 3. 正式运行
runSoftware(engine);
}
// 生成连接 Oracle 的相关配置
private static Properties genProps() {
// 配置
Properties props = new Properties();
props.setProperty("name", "oracle-engine-0033");
props.setProperty("connector.class", "io.debezium.connector.oracle.OracleConnector");
props.setProperty("offset.storage", "org.apache.kafka.connect.storage.FileOffsetBackingStore");
// 指定 offset 存储目录
props.setProperty("offset.storage.file.filename", "/Users/xxxxx/IdeaProjects/debezium-datetime-converter/oracle-offset.txt");
// 指定 Topic offset 写入磁盘的间隔时间
props.setProperty("offset.flush.interval.ms", "6000");
//设置数据库连接信息
props.setProperty("database.hostname", "xxx");
props.setProperty("database.port", "1521");
props.setProperty("database.user", "xxxx");
props.setProperty("database.password", "xxxx");
props.setProperty("table.include.list", "xxxxxx");
props.setProperty("database.history", FileDatabaseHistory.class.getCanonicalName());
props.setProperty("database.history.file.filename", "/Users/xxxxx/IdeaProjects/debezium-datetime-converter/oracle-history.txt");
//每次运行需要对此参数进行修改,因为此参数唯一
props.setProperty("database.server.name", "my-oracle-connector-0023");
//指定 CDB 模式的实例名
props.setProperty("database.dbname", "easdb2");
//是否输出 schema 信息
props.setProperty("key.converter.schemas.enable", "false");
props.setProperty("value.converter.schemas.enable", "false");
props.setProperty("database.serverTimezone", "UTC"); // 时区
props.setProperty("log.mining.strategy", "online_catalog");
props.setProperty("database.history.store.only.captured.tables.ddl", "true");
props.setProperty("log.mining.session.max.xs", "10000");
props.setProperty("log.mining.transaction.retention.hours","1");
props.setProperty("time.precision.mode","connect");
props.setProperty("decimal.handling.mode","string");
// Kafka 连接相关配置
/*props.setProperty("database.history.kafka.bootstrap.servers", "xxxxx:9092");
props.setProperty("database.history.kafka.topic", "oracle.history");*/
return props;
}
// 开始运行程序
public static void runSoftware(DebeziumEngine<ChangeEvent<String, String>> engine) {
ExecutorService executor = Executors.newSingleThreadExecutor();
executor.execute(engine);
}
// 实现逻辑
public static DebeziumEngine<ChangeEvent<String, String>> engineBuild(Properties props) {
// 2. 构建 DebeziumEngine
// 使用 Json 格式
DebeziumEngine<ChangeEvent<String, String>> engine =
DebeziumEngine
.create(Json.class)
.using(props)
.notifying(record -> {
// record中会有操作的类型(增、删、改)和具体的数据
System.out.println("record.key() = " + record.key());
System.out.println("record.value() = " + record.value());
})
.using((success, message, error) -> {
// 强烈建议加上此部分的回调代码,方便查看错误信息
if (!success && error != null) {
// 报错回调
System.out.println("----------error------");
System.out.println(message);
//System.out.println(error);
error.printStackTrace();
}
})
.build();
return engine;
}
}
查看 DebeziumEngine 类, 发现继承了Runnable, Closeable类:
往下找到 EmbeddedEngine:
我们主要分析 EmbeddedEngine 类的 run 方法:
详细代码如下:
public void run() {
if (this.runningThread.compareAndSet((Object)null, Thread.currentThread())) {
String engineName = this.config.getString(ENGINE_NAME);
String connectorClassName = this.config.getString(CONNECTOR_CLASS);
Optional<io.debezium.engine.DebeziumEngine.ConnectorCallback> connectorCallback = Optional.ofNullable(this.connectorCallback);
this.latch.countUp();
try {
Configuration var10000 = this.config;
Set var10001 = CONNECTOR_FIELDS;
Logger var10002 = LOGGER;
Objects.requireNonNull(var10002);
if (!var10000.validateAndRecord(var10001, var10002::error)) {
this.fail("Failed to start connector with invalid configuration (see logs for actual errors)");
return;
}
SourceConnector connector = null;
try {
Class<? extends SourceConnector> connectorClass = this.classLoader.loadClass(connectorClassName);
connector = (SourceConnector)connectorClass.getDeclaredConstructor().newInstance();
} catch (Throwable var412) {
this.fail("Unable to instantiate connector class '" + connectorClassName + "'", var412);
return;
}
String offsetStoreClassName = this.config.getString(OFFSET_STORAGE);
OffsetBackingStore offsetStore = null;
try {
Class<? extends OffsetBackingStore> offsetStoreClass = this.classLoader.loadClass(offsetStoreClassName);
offsetStore = (OffsetBackingStore)offsetStoreClass.getDeclaredConstructor().newInstance();
} catch (Throwable var406) {
this.fail("Unable to instantiate OffsetBackingStore class '" + offsetStoreClassName + "'", var406);
return;
}
try {
offsetStore.configure(this.workerConfig);
offsetStore.start();
} catch (Throwable var407) {
this.fail("Unable to configure and start the '" + offsetStoreClassName + "' offset backing store", var407);
offsetStore.stop();
return;
}
if (this.offsetCommitPolicy == null) {
this.offsetCommitPolicy = (OffsetCommitPolicy)Instantiator.getInstanceWithProperties(this.config.getString(OFFSET_COMMIT_POLICY), () -> {
return this.getClass().getClassLoader();
}, this.config.asProperties());
}
ConnectorContext context = new ConnectorContext() {
public void requestTaskReconfiguration() {
}
public void raiseError(Exception e) {
EmbeddedEngine.this.fail(e.getMessage(), e);
}
};
connector.initialize(context);
OffsetStorageWriter offsetWriter = new OffsetStorageWriter(offsetStore, engineName, this.keyConverter, this.valueConverter);
final OffsetStorageReader offsetReader = new OffsetStorageReaderImpl(offsetStore, engineName, this.keyConverter, this.valueConverter);
Duration commitTimeout = Duration.ofMillis(this.config.getLong(OFFSET_COMMIT_TIMEOUT_MS));
try {
connector.start(this.workerConfig.originalsStrings());
connectorCallback.ifPresent(io.debezium.engine.DebeziumEngine.ConnectorCallback::connectorStarted);
List<Map<String, String>> taskConfigs = connector.taskConfigs(1);
Class<? extends Task> taskClass = connector.taskClass();
if (taskConfigs.isEmpty()) {
String msg = "Unable to start connector's task class '" + taskClass.getName() + "' with no task configuration";
this.fail(msg);
return;
}
this.task = null;
try {
this.task = (SourceTask)taskClass.getDeclaredConstructor().newInstance();
} catch (InstantiationException | IllegalAccessException var408) {
this.fail("Unable to instantiate connector's task class '" + taskClass.getName() + "'", var408);
return;
}
try {
SourceTaskContext taskContext = new SourceTaskContext() {
public OffsetStorageReader offsetStorageReader() {
return offsetReader;
}
public Map<String, String> configs() {
return null;
}
};
this.task.initialize(taskContext);
this.task.start((Map)taskConfigs.get(0));
connectorCallback.ifPresent(io.debezium.engine.DebeziumEngine.ConnectorCallback::taskStarted);
} catch (Throwable var409) {
try {
LOGGER.debug("Stopping the task");
this.task.stop();
} catch (Throwable var400) {
LOGGER.info("Error while trying to stop the task");
}
Configuration config = Configuration.from((Map)taskConfigs.get(0)).withMaskedPasswords();
String msg = "Unable to initialize and start connector's task class '" + taskClass.getName() + "' with config: " + config;
this.fail(msg, var409);
return;
}
this.recordsSinceLastCommit = 0L;
Throwable handlerError = null;
try {
this.timeOfLastCommitMillis = this.clock.currentTimeInMillis();
EmbeddedEngine.RecordCommitter committer = this.buildRecordCommitter(offsetWriter, this.task, commitTimeout);
while(this.runningThread.get() != null) {
List changeRecords = null;
try {
LOGGER.debug("Embedded engine is polling task for records on thread {}", this.runningThread.get());
changeRecords = this.task.poll();
LOGGER.debug("Embedded engine returned from polling task for records");
} catch (InterruptedException var402) {
LOGGER.debug("Embedded engine interrupted on thread {} while polling the task for records", this.runningThread.get());
if (this.runningThread.get() == Thread.currentThread()) {
Thread.currentThread().interrupt();
}
break;
} catch (RetriableException var403) {
LOGGER.info("Retrieable exception thrown, connector will be restarted", var403);
}
try {
if (changeRecords != null && !changeRecords.isEmpty()) {
LOGGER.debug("Received {} records from the task", changeRecords.size());
Stream var420 = changeRecords.stream();
Transformations var421 = this.transformations;
Objects.requireNonNull(var421);
changeRecords = (List)var420.map(var421::transform).filter((x) -> {
return x != null;
}).collect(Collectors.toList());
}
if (changeRecords != null && !changeRecords.isEmpty()) {
LOGGER.debug("Received {} transformed records from the task", changeRecords.size());
try {
this.handler.handleBatch(changeRecords, committer);
} catch (StopConnectorException var401) {
break;
}
} else {
LOGGER.debug("Received no records from the task");
}
} catch (Throwable var404) {
handlerError = var404;
break;
}
}
} finally {
if (handlerError != null) {
this.fail("Stopping connector after error in the application's handler method: " + handlerError.getMessage(), handlerError);
}
try {
LOGGER.info("Stopping the task and engine");
this.task.stop();
connectorCallback.ifPresent(io.debezium.engine.DebeziumEngine.ConnectorCallback::taskStopped);
this.commitOffsets(offsetWriter, commitTimeout, this.task);
if (handlerError == null) {
this.succeed("Connector '" + connectorClassName + "' completed normally.");
}
} catch (InterruptedException var398) {
LOGGER.debug("Interrupted while committing offsets");
Thread.currentThread().interrupt();
} catch (Throwable var399) {
this.fail("Error while trying to stop the task and commit the offsets", var399);
}
}
} catch (Throwable var410) {
this.fail("Error while trying to run connector class '" + connectorClassName + "'", var410);
} finally {
try {
offsetStore.stop();
} catch (Throwable var396) {
this.fail("Error while trying to stop the offset store", var396);
} finally {
try {
connector.stop();
connectorCallback.ifPresent(io.debezium.engine.DebeziumEngine.ConnectorCallback::connectorStopped);
} catch (Throwable var395) {
this.fail("Error while trying to stop connector class '" + connectorClassName + "'", var395);
}
}
}
} finally {
this.latch.countDown();
this.runningThread.set((Object)null);
this.completionCallback.handle(this.completionResult.success(), this.completionResult.message(), this.completionResult.error());
}
}
}
启动connector Task, 这里会根据connector类型创建对应的task, 这里创建的task是 io.debezium.connector.oracle.OracleConnectorTask
:
进入到 OracleConnectorTask 的start方法:
详细代码如下:
@Override
public ChangeEventSourceCoordinator<OraclePartition, OracleOffsetContext> start(Configuration config) {
OracleConnectorConfig connectorConfig = new OracleConnectorConfig(config);
TopicSelector<TableId> topicSelector = OracleTopicSelector.defaultSelector(connectorConfig);
SchemaNameAdjuster schemaNameAdjuster = connectorConfig.schemaNameAdjustmentMode().createAdjuster();
JdbcConfiguration jdbcConfig = connectorConfig.getJdbcConfig();
jdbcConnection = new OracleConnection(jdbcConfig, () -> getClass().getClassLoader());
validateRedoLogConfiguration();
OracleValueConverters valueConverters = new OracleValueConverters(connectorConfig, jdbcConnection);
OracleDefaultValueConverter defaultValueConverter = new OracleDefaultValueConverter(valueConverters, jdbcConnection);
TableNameCaseSensitivity tableNameCaseSensitivity = connectorConfig.getAdapter().getTableNameCaseSensitivity(jdbcConnection);
this.schema = new OracleDatabaseSchema(connectorConfig, valueConverters, defaultValueConverter, schemaNameAdjuster,
topicSelector, tableNameCaseSensitivity);
Offsets<OraclePartition, OracleOffsetContext> previousOffsets = getPreviousOffsets(new OraclePartition.Provider(connectorConfig),
connectorConfig.getAdapter().getOffsetContextLoader());
OraclePartition partition = previousOffsets.getTheOnlyPartition();
OracleOffsetContext previousOffset = previousOffsets.getTheOnlyOffset();
validateAndLoadDatabaseHistory(connectorConfig, partition, previousOffset, schema);
taskContext = new OracleTaskContext(connectorConfig, schema);
Clock clock = Clock.system();
// Set up the task record queue ...
this.queue = new ChangeEventQueue.Builder<DataChangeEvent>()
.pollInterval(connectorConfig.getPollInterval())
.maxBatchSize(connectorConfig.getMaxBatchSize())
.maxQueueSize(connectorConfig.getMaxQueueSize())
.maxQueueSizeInBytes(connectorConfig.getMaxQueueSizeInBytes())
.loggingContextSupplier(() -> taskContext.configureLoggingContext(CONTEXT_NAME))
.build();
errorHandler = new OracleErrorHandler(connectorConfig, queue);
final OracleEventMetadataProvider metadataProvider = new OracleEventMetadataProvider();
EventDispatcher<OraclePartition, TableId> dispatcher = new EventDispatcher<>(
connectorConfig,
topicSelector,
schema,
queue,
connectorConfig.getTableFilters().dataCollectionFilter(),
DataChangeEvent::new,
metadataProvider,
new HeartbeatFactory<>(
connectorConfig,
topicSelector,
schemaNameAdjuster,
() -> getHeartbeatConnection(connectorConfig, jdbcConfig),
exception -> {
final String sqlErrorId = exception.getMessage();
throw new DebeziumException("Could not execute heartbeat action query (Error: " + sqlErrorId + ")", exception);
}),
schemaNameAdjuster);
final OracleStreamingChangeEventSourceMetrics streamingMetrics = new OracleStreamingChangeEventSourceMetrics(taskContext, queue, metadataProvider,
connectorConfig);
ChangeEventSourceCoordinator<OraclePartition, OracleOffsetContext> coordinator = new ChangeEventSourceCoordinator<>(
previousOffsets,
errorHandler,
OracleConnector.class,
connectorConfig,
new OracleChangeEventSourceFactory(connectorConfig, jdbcConnection, errorHandler, dispatcher, clock, schema, jdbcConfig, taskContext, streamingMetrics),
new OracleChangeEventSourceMetricsFactory(streamingMetrics),
dispatcher,
schema);
coordinator.start(taskContext, this.queue, metadataProvider);
return coordinator;
}
进入到这个方法:
至此,我们才真正找到对应的执行方法:
开始获取变更数据:
首先执行数据表快照:
然后处理日志变更数据:
这里创建的streamingSource 是 io.debezium.connector.oracle.logminer.LogMinerStreamingChangeEventSource
然后执行 LogMinerStreamingChangeEventSource 的 execute 方法,来通过 LogMiner 循环获取 oracle 对应表的变更记录。
详细代码如下:
@Override
public void execute(ChangeEventSourceContext context, OraclePartition partition, OracleOffsetContext offsetContext) {
if (!connectorConfig.getSnapshotMode().shouldStream()) {
LOGGER.info("Streaming is not enabled in current configuration");
return;
}
try {
// We explicitly expect auto-commit to be disabled
jdbcConnection.setAutoCommit(false);
startScn = offsetContext.getScn();
snapshotScn = offsetContext.getSnapshotScn();
Scn firstScn = getFirstScnInLogs(jdbcConnection);
if (startScn.compareTo(snapshotScn) == 0) {
// This is the initial run of the streaming change event source.
// We need to compute the correct start offset for mining. That is not the snapshot offset,
// but the start offset of the oldest transaction that was still pending when the snapshot
// was taken.
computeStartScnForFirstMiningSession(offsetContext, firstScn);
}
try (LogWriterFlushStrategy flushStrategy = resolveFlushStrategy()) {
if (!isContinuousMining && startScn.compareTo(firstScn.subtract(Scn.ONE)) < 0) {
// startScn is the exclusive lower bound, so must be >= (firstScn - 1)
throw new DebeziumException(
"Online REDO LOG files or archive log files do not contain the offset scn " + startScn + ". Please perform a new snapshot.");
}
setNlsSessionParameters(jdbcConnection);
checkDatabaseAndTableState(jdbcConnection, connectorConfig.getPdbName(), schema);
try (LogMinerEventProcessor processor = createProcessor(context, partition, offsetContext)) {
if (archiveLogOnlyMode && !waitForStartScnInArchiveLogs(context, startScn)) {
return;
}
initializeRedoLogsForMining(jdbcConnection, false, startScn);
int retryAttempts = 1;
Stopwatch sw = Stopwatch.accumulating().start();
while (context.isRunning()) {
// Calculate time difference before each mining session to detect time zone offset changes (e.g. DST) on database server
streamingMetrics.calculateTimeDifference(getDatabaseSystemTime(jdbcConnection));
if (archiveLogOnlyMode && !waitForStartScnInArchiveLogs(context, startScn)) {
break;
}
Instant start = Instant.now();
endScn = calculateEndScn(jdbcConnection, startScn, endScn);
// This is a small window where when archive log only mode has completely caught up to the last
// record in the archive logs that both the start and end values are identical. In this use
// case we want to pause and restart the loop waiting for a new archive log before proceeding.
if (archiveLogOnlyMode && startScn.equals(endScn)) {
pauseBetweenMiningSessions();
continue;
}
flushStrategy.flush(jdbcConnection.getCurrentScn());
boolean restartRequired = false;
if (connectorConfig.getLogMiningMaximumSession().isPresent()) {
final Duration totalDuration = sw.stop().durations().statistics().getTotal();
if (totalDuration.toMillis() >= connectorConfig.getLogMiningMaximumSession().get().toMillis()) {
LOGGER.info("LogMiner session has exceeded maximum session time of '{}', forcing restart.", connectorConfig.getLogMiningMaximumSession());
restartRequired = true;
}
else {
// resume the existing stop watch, we haven't met the criteria yet
sw.start();
}
}
if (restartRequired || hasLogSwitchOccurred()) {
// This is the way to mitigate PGA leaks.
// With one mining session, it grows and maybe there is another way to flush PGA.
// At this point we use a new mining session
endMiningSession(jdbcConnection, offsetContext);
initializeRedoLogsForMining(jdbcConnection, true, startScn);
// log switch or restart required, re-create a new stop watch
sw = Stopwatch.accumulating().start();
}
if (context.isRunning()) {
if (!startMiningSession(jdbcConnection, startScn, endScn, retryAttempts)) {
retryAttempts++;
}
else {
retryAttempts = 1;
startScn = processor.process(partition, startScn, endScn);
streamingMetrics.setCurrentBatchProcessingTime(Duration.between(start, Instant.now()));
captureSessionMemoryStatistics(jdbcConnection);
}
pauseBetweenMiningSessions();
}
}
}
}
}
catch (Throwable t) {
logError(streamingMetrics, "Mining session stopped due to the {}", t);
errorHandler.setProducerThrowable(t);
}
finally {
LOGGER.info("startScn={}, endScn={}", startScn, endScn);
LOGGER.info("Streaming metrics dump: {}", streamingMetrics.toString());
LOGGER.info("Offsets: {}", offsetContext);
}
}
好了,对应的源码就先分析到这里。。。