tidb-cdc同步到kafka报错cdc报错CDC:ErrKafkaNewSaramaProducer

一、现象

cdc创建任务的时候出现卡死的情况或者报错[CDC:ErrKafkaNewSaramaProducer]kafka: client has run out of available brokers to talk to (Is your cluster reachable?)

二、可能产生的原因

  1. kafka-version没有指定
  2. kafka开启sasl验证,cdc没有支持,https://github.com/pingcap/tiflow/issues/1106
  3. cdc版本兼容不了当前的kafka
  4. server.properties中的listerers要配置,不要用localhost listeners=PLAINTEXT://kafka_ip:9092
  5. cdc机器不能访问kafka的9092端口
  6. cdc-ctl的机器不能访问kafka的9092端口

三、排查方法

1.已经指定了Kafka-version,并且kafka没有开启sasl验证,所以原因1,2排除
2.尝试了多个版本的kafka和tidb集群,也是一样的报错,原因3排除
3.listeners的配置已经调了,kafka的9092端口已经对cdc机器开了,所以原因4,5排除
4.偶然间在tidb集群部署的某一台机器部署了kafka,这个可以正常创建任务,所以问题一定是在端口访问上,但是kafka的9092端口已经对cdc机器开启了,那么还有可能是什么呢,最终通过暴力的遍历测试定位到cdc-ctl在创建任务的时候同样是需要访问kafka的9092端口

四、最终结论

cdc同步到kafka的时候,kafka的9092端口不仅要对cdc机器开放,也要对cdc-ctl机器开放

下面先不用看,还没有缕清思路

五、源码理解

1.源头
因为对ctl为什么需要访问kafka百思不得其姐(解),所以想着从源码中来寻找答案,看是否能找到原因

2.源码步骤理解
2.1 func newCmdCreateChangefeed ,validate,complete,run – create changefeed

2.2

2.3

2.4

2.5 NewKafkaSaramaSink 的 adminClient --用来创建一个新的kafka链接

2.6 var NewAdminClientImpl kafka.ClusterAdminClientCreator = kafka.NewSaramaAdminClient

2.7 NewSaramaAdminClient 建立kafka链接

https://github.com/pingcap/tiflow/blob/bc1c72da525844c7940ce2f33b5acd092b0ff713/pkg/cmd/cli/cli_changefeed_create.go

// newCmdCreateChangefeed creates the `cli changefeed create` command.
func newCmdCreateChangefeed(f factory.Factory) *cobra.Command {
	commonChangefeedOptions := newChangefeedCommonOptions()

	o := newCreateChangefeedOptions(commonChangefeedOptions)

	command := &cobra.Command{
		Use:   "create",
		Short: "Create a new replication task (changefeed)",
		Args:  cobra.NoArgs,
		RunE: func(cmd *cobra.Command, args []string) error {
			ctx := cmdcontext.GetDefaultContext()

			err := o.complete(ctx, f, cmd)
			if err != nil {
				return err
			}

			err = o.validate(ctx, cmd)
			if err != nil {
				return err
			}

			return o.run(ctx, cmd)
		},
	}

	o.addFlags(command)

	return command
}



// run the `cli changefeed create` command.
func (o *createChangefeedOptions) run(ctx context.Context, cmd *cobra.Command) error {
	id := o.changefeedID
	if id == "" {
		id = uuid.New().String()
	}

// validate checks that the provided attach options are specified.
func (o *createChangefeedOptions) validate(ctx context.Context, cmd *cobra.Command) error {
	if o.commonChangefeedOptions.sinkURI == "" {
		return errors.New("Creating changefeed without a sink-uri")
	}


// complete adapts from the command line args to the data and client required.
func (o *createChangefeedOptions) complete(ctx context.Context, f factory.Factory, cmd *cobra.Command) error {
	etcdClient, err := f.EtcdClient()
	if err != nil {
		return err
	}

// validateSink will create a sink and verify that the configuration is correct.
func (o *createChangefeedOptions) validateSink(
	ctx context.Context, cfg *config.ReplicaConfig, opts map[string]string,
) error {
	return sink.Validate(ctx, o.commonChangefeedOptions.sinkURI, cfg, opts)
}

https://github.com/pingcap/tiflow/blob/da440ec90c4b63b07d0d6e85d579451187e5537c/pkg/cmd/factory/factory.go

// Factory defines the client-side construction factory.
type Factory interface {
	ClientGetter
	EtcdClient() (*etcd.CDCEtcdClient, error)
	PdClient() (pd.Client, error)
	KvStorage() (kv.Storage, error)
}

https://github.com/pingcap/tiflow/blob/20f4adecffcd0e3ae1c78799ca19833d9d6842f5/pkg/config/replica_config.go

// GetDefaultReplicaConfig returns the default replica config.
func GetDefaultReplicaConfig() *ReplicaConfig {
	return defaultReplicaConfig.Clone()
}

type replicaConfig struct {
	CaseSensitive    bool              `toml:"case-sensitive" json:"case-sensitive"`
	EnableOldValue   bool              `toml:"enable-old-value" json:"enable-old-value"`
	ForceReplicate   bool              `toml:"force-replicate" json:"force-replicate"`
	CheckGCSafePoint bool              `toml:"check-gc-safe-point" json:"check-gc-safe-point"`
	Filter           *FilterConfig     `toml:"filter" json:"filter"`
	Mounter          *MounterConfig    `toml:"mounter" json:"mounter"`
	Sink             *SinkConfig       `toml:"sink" json:"sink"`
	Consistent       *ConsistentConfig `toml:"consistent" json:"consistent"`
}

https://github.com/pingcap/tiflow/blob/20f4adecffcd0e3ae1c78799ca19833d9d6842f5/pkg/config/sink.go

// SinkConfig represents sink config for a changefeed
type SinkConfig struct {
	DispatchRules   []*DispatchRule   `toml:"dispatchers" json:"dispatchers"`
	Protocol        string            `toml:"protocol" json:"protocol"`
	ColumnSelectors []*ColumnSelector `toml:"column-selectors" json:"column-selectors"`
	SchemaRegistry  string            `toml:"schema-registry" json:"schema-registry"`
}

https://github.com/pingcap/tiflow/blob/dd41f0f1b0335991b0c7afd33171c665528bd7ac/cdc/sink/sink.go

// Validate sink if given valid parameters.
func Validate(ctx context.Context, sinkURI string, cfg *config.ReplicaConfig, opts map[string]string) error {
	sinkFilter, err := filter.NewFilter(cfg)
	if err != nil {
		return err
	}

// New creates a new sink with the sink-uri
func New(
	ctx context.Context, changefeedID model.ChangeFeedID, sinkURIStr string,
	filter *filter.Filter, config *config.ReplicaConfig, opts map[string]string,
	errCh chan error,
) (Sink, error) {
	// parse sinkURI as a URI
	sinkURI, err := url.Parse(sinkURIStr)
	if err != nil {
		return nil, cerror.WrapError(cerror.ErrSinkURIInvalid, err)
	}
	if newSink, ok := sinkIniterMap[strings.ToLower(sinkURI.Scheme)]; ok {
		return newSink(ctx, changefeedID, sinkURI, filter, config, opts, errCh)
	}
	return nil, cerror.ErrSinkURIInvalid.GenWithStack("the sink scheme (%s) is not supported", sinkURI.Scheme)
}


func init() {
	// register blackhole sink
	sinkIniterMap["blackhole"] = func(
		ctx context.Context, changefeedID model.ChangeFeedID, sinkURI *url.URL,
		filter *filter.Filter, config *config.ReplicaConfig, opts map[string]string,
		errCh chan error,
	) (Sink, error) {
		return newBlackHoleSink(ctx), nil
	}
	// register kafka sink
	sinkIniterMap["kafka"] = func(
		ctx context.Context, changefeedID model.ChangeFeedID, sinkURI *url.URL,
		filter *filter.Filter, config *config.ReplicaConfig, opts map[string]string,
		errCh chan error,
	) (Sink, error) {
		return mq.NewKafkaSaramaSink(ctx, sinkURI, filter, config, opts, errCh)
	}
	sinkIniterMap["kafka+ssl"] = sinkIniterMap["kafka"]

https://github.com/pingcap/tiflow/blob/dd41f0f1b0335991b0c7afd33171c665528bd7ac/pkg/filter/filter.go

// NewFilter creates a filter.
func NewFilter(cfg *config.ReplicaConfig) (*Filter, error) {
	f, err := VerifyRules(cfg)
	if err != nil {
		return nil, cerror.WrapError(cerror.ErrFilterRuleInvalid, err)
	}

https://github.com/pingcap/tiflow/blob/8e8fddb046754f148d76a3b263e33c2d06d38e44/cdc/owner/changefeed.go

type changefeed struct {
newSink      func() DDLSink
}

https://github.com/pingcap/tiflow/blob/0b7969deea495ff6462b9a980a5717cca2fbcec5/cdc/sink/mq/mq.go

// NewKafkaSaramaSink creates a new Kafka mqSink.
func NewKafkaSaramaSink(ctx context.Context, sinkURI *url.URL,
	replicaConfig *config.ReplicaConfig,
	errCh chan error,
) (*mqSink, error) {
	topic := strings.TrimFunc(sinkURI.Path, func(r rune) bool {
		return r == '/'
	})
	if topic == "" {
		return nil, cerror.ErrKafkaInvalidConfig.GenWithStack("no topic is specified in sink-uri")
	}
	
	adminClient, err := kafka.NewAdminClientImpl(baseConfig.BrokerEndpoints, saramaConfig)
	if err != nil {
		return nil, cerror.WrapError(cerror.ErrKafkaNewSaramaProducer, err)
	}


https://github.com/pingcap/tiflow/blob/dd41f0f1b0335991b0c7afd33171c665528bd7ac/cdc/sink/mq/producer/kafka/kafka.go

// NewAdminClientImpl specifies the build method for the admin client.
var NewAdminClientImpl kafka.ClusterAdminClientCreator = kafka.NewSaramaAdminClient

https://github.com/pingcap/tiflow/blob/7fa1f2fb33e3685c3a45d27e5786f3075ae9fa41/pkg/kafka/cluster_admin_client.go

// NewSaramaAdminClient constructs a ClusterAdminClient with sarama.
func NewSaramaAdminClient(addrs []string, conf *sarama.Config) (ClusterAdminClient, error) {
	return sarama.NewClusterAdmin(addrs, conf)
}
下面是使用 Flink Connector TiDB CDC 通过 Stream API 连接 TiDB 的示例代码: ```java import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; import org.apache.flink.streaming.connectors.tidb.JdbcConnectionOptions; import org.apache.flink.streaming.connectors.tidb.TiDBOptions; import org.apache.flink.streaming.connectors.tidb.TiDBSink; import org.apache.flink.streaming.connectors.tidb.TiDBSource; import org.apache.flink.streaming.connectors.tidb.TransactionIsolation; import org.apache.flink.streaming.connectors.tidb.TiDBCatalog; import org.apache.flink.table.api.EnvironmentSettings; import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; import org.apache.flink.table.api.bridge.java.internal.StreamTableEnvironmentImpl; import org.apache.flink.types.Row; import java.util.Properties; public class TiDBStreamExample { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); StreamTableEnvironment tEnv = StreamTableEnvironment.create(env, settings); // Define TiDB catalog TiDBCatalog catalog = new TiDBCatalog("tidb_catalog", "default_database", JdbcConnectionOptions.builder() .withUrl("jdbc:mysql://tidb_ip:tidb_port/tidb_database_name?useSSL=false&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC") .withUsername("tidb_username") .withPassword("tidb_password") .build(), TiDBOptions.builder().withDatabaseUrl("jdbc:mysql://tidb_ip:tidb_port/tidb_database_name").build()); tEnv.registerCatalog("tidb_catalog", catalog); tEnv.useCatalog("tidb_catalog"); // Define TiDB source TiDBSource source = TiDBSource.builder() .setDatabaseName("tidb_database_name") .setTableName("tidb_table_name") .setOptions(TiDBOptions.builder() .withDatabaseUrl("jdbc:mysql://tidb_ip:tidb_port/tidb_database_name") .build()) .setPrimaryKey("id") .setTransactionIsolation(TransactionIsolation.READ_COMMITTED) .build(); // Create a data stream from TiDB source DataStream<Row> stream = env.addSource(source); // Define Flink Kafka producer Properties props = new Properties(); props.setProperty("bootstrap.servers", "kafka_ip:kafka_port"); FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<String>( "kafka_topic", new SimpleStringSchema(), props); // Map the data stream to a string stream and send it to Kafka DataStream<String> stringStream = stream.map(new MapFunction<Row, String>() { @Override public String map(Row row) throws Exception { return row.toString(); } }); stringStream.addSink(kafkaProducer); // Define Flink Kafka consumer FlinkKafkaConsumer<String> kafkaConsumer = new FlinkKafkaConsumer<String>( "kafka_topic", new SimpleStringSchema(), props); // Create a data stream from Kafka DataStream<String> kafkaStream = env.addSource(kafkaConsumer); // Convert the Kafka stream to a table and register it in the table environment tEnv.createTemporaryView("kafka_table", kafkaStream, "value"); // Query the table and print the result to console tEnv.sqlQuery("SELECT * FROM kafka_table").execute().print(); // Define TiDB sink TiDBSink sink = TiDBSink.builder() .setDatabaseName("tidb_database_name") .setTableName("tidb_table_name") .setOptions(TiDBOptions.builder() .withDatabaseUrl("jdbc:mysql://tidb_ip:tidb_port/tidb_database_name") .build()) .setPrimaryKey("id") .build(); // Convert the Kafka stream back to a data stream of rows and write it to TiDB DataStream<Row> rowStream = kafkaStream.map(new MapFunction<String, Row>() { @Override public Row map(String value) throws Exception { String[] fields = value.split(","); return Row.of(Integer.parseInt(fields[0]), fields[1], Double.parseDouble(fields[2])); } }); rowStream.addSink(sink); env.execute("TiDB Stream Example"); } } ``` 在上面的示例代码中,我们首先定义了一个 TiDBCatalog 对象,用于连接 TiDB 数据库。然后,我们使用 TiDBSource.builder() 方法定义了一个 TiDB 数据源,用于从 TiDB 中读取数据。接着,我们使用 env.addSource(source) 方法创建了一个 Flink 数据流。我们还定义了一个 Flink Kafka 生产者,用于将数据流发送到 Kafka。为了将数据流转换为字符串流,我们使用了 map() 方法。然后,我们将字符串流发送到 Kafka。接着,我们定义了一个 Flink Kafka 消费者,用于从 Kafka 中读取数据。我们还将 Kafka 数据流转换为表,并在表环境中注册它。最后,我们使用 TiDBSink.builder() 方法定义了一个 TiDB 汇聚器,用于将数据流写入 TiDB 中。 请注意,在上面的示例代码中,我们使用了 TiDBCatalog 和 TiDBSource 类来连接 TiDB 数据库。这些类需要 TiDB Connector JAR 包的支持。如果您没有安装该 JAR 包,请按照以下步骤安装: 1. 访问 TiDB Connector JAR 包的下载页面:https://github.com/pingcap/tidb/releases/tag/v4.0.8 2. 下载适用于您的操作系统的 JAR 包 3. 将 JAR 包添加到您的项目依赖中 最后,记得将代码中的 tidb_ip、tidb_port、tidb_database_name、tidb_table_name、tidb_username 和 tidb_password 替换为实际的值。同样,将代码中的 kafka_ip、kafka_port 和 kafka_topic 替换为实际的值。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值