go 同步 kafka 数据程序

 简单用go手了一个kafka同步到另外一个kafka程序

package main

import (
   "logcopy/config"
   "logcopy/utils/kafka"
   "context"
   "encoding/json"
   "errors"
   "flag"
   "fmt"
   "io"
   "os"
   "os/signal"
   "syscall"
   "time"

   kafkago "github.com/segmentio/kafka-go"

   "github.com/panjf2000/ants/v2"

   "gopkg.in/yaml.v2"

   "go.uber.org/zap/zapcore"
   "gopkg.in/natefinch/lumberjack.v2"

   "go.uber.org/zap"
)

type Config struct {
   Mode              string              `yaml:"mode"`
   Log               *Log                `yaml:"log"`
   ProcsKafkaConfigs []*ProcsKafkaConfig `yaml:"procConfigs"`
}

type ProcsKafkaConfig struct {
   Name        string        `yaml:"name"`
   KafkaReader *config.Kafka `yaml:"kafka_reader"`
   KafkaWriter *config.Kafka `yaml:"kafka_writer"`
   ProcsCount  int           `yaml:"procs_count"`
   PoolCount   int           `yaml:"pool_count"`
}

type Log struct {
   FileName    string `yaml:"file_name"`
   MaxSize     int    `yaml:"max_size"`
   MaxBackups  int    `yaml:"max_backups"`
   MaxAge      int    `yaml:"max_age"`
   Compress    bool   `yaml:"compress"`
   Level       string `yaml:"level"`
   Development bool   `yaml:"development"`
   Caller      bool   `yaml:"caller"`
}

var (
   confPath = flag.String("config", "未知", "config.yaml")
   ctx      = context.Background()
)

type LogCopyService struct {
   Conf            *Config
   MessageServices []*MessageService
}

type MessageService struct {
   Topics      []string
   KafkaReader *kafka.Reader
   KafkaWriter *kafka.Writer
   WorkerPool  *ants.PoolWithFunc
}

func main() {
   flag.Parse()
   Conf, err := NewConfig(*confPath)
   if err != nil {
      fmt.Errorf("init config error:%s", err.Error())
   }
   logger, err := initZapLogger(Conf.Log)
   if err != nil {
      fmt.Errorf("init zap logger error:%s", err.Error())
   }

   zap.ReplaceGlobals(logger)

   zap.L().Info("logger Init Success")

   done := make(chan struct{})
   go func() {
      signals := make(chan os.Signal, 1)
      signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
      sig := <-signals
      zap.L().Error("receive signal",
         zap.Any("signal", sig))
      done <- struct{}{}
   }()
   messageService := make([]*MessageService, len(Conf.ProcsKafkaConfigs))
   m := &LogCopyService{Conf: Conf, MessageServices: messageService}
   m.initCopyLog()
   <-done

   m.close()

   if err := logger.Sync(); err == nil {
      fmt.Println("logger Sync success")
   }
}

func (m *LogCopyService) initCopyLog() {
   for i, kafkaConfig := range m.Conf.ProcsKafkaConfigs {
      kafkaReaderConf := &config.Kafka{kafkaConfig.KafkaReader.Topics, kafkaConfig.KafkaReader.Brokers, kafkaConfig.KafkaReader.GroupId, kafkaConfig.KafkaReader.GroupCount, 0}
      KafkaReader, err := kafka.NewReader(kafkaReaderConf)
      if err != nil {
         zap.L().Error("New|Connect Kafka NewReader log copy", zap.String("err", err.Error()))
      }
      zap.L().Info("New|Connected Kafka NewReader log-copy")
      kafkaWriterConf := &config.Kafka{kafkaConfig.KafkaWriter.Topics, kafkaConfig.KafkaWriter.Brokers, kafkaConfig.KafkaWriter.GroupId, kafkaConfig.KafkaWriter.GroupCount, 0}
      KafkaWriter, err := kafka.NewWriter(kafkaWriterConf)
      if err != nil {
         zap.L().Error("New|Connect Kafka NewWriter log copy", zap.String("err", err.Error()))
      }
      messageService := &MessageService{KafkaReader: KafkaReader, KafkaWriter: KafkaWriter, Topics: kafkaConfig.KafkaWriter.Topics}
      workerPool, err := ants.NewPoolWithFunc(kafkaConfig.PoolCount, func(data interface{}) {
         //处理每一条消费函数
         messageService.processData(data)
      })
      messageService.WorkerPool = workerPool
      m.MessageServices[i] = messageService
      zap.L().Info("New|Connected Kafka NewWriter log-copy")

      for _, topic := range kafkaConfig.KafkaReader.Topics {
         for _, reader := range KafkaReader.Readers[topic] {
            for i := 0; i <= kafkaConfig.ProcsCount; i++ {
               go messageService.process(topic, reader)
            }
         }
      }
   }
}

func (s *MessageService) process(topic string, reader *kafkago.Reader) {
   for {
      m, err := reader.ReadMessage(ctx)
      if err != nil {
         if err == io.EOF {
            zap.L().Error("消费DataSource kafka closed|EOF",
               zap.String("topic", topic),
               zap.String("error", err.Error()))
            break
         }
         zap.L().Error("DataSource process|reader.ReadMessage",
            zap.String("topic", topic),
            zap.String("error", err.Error()))
         continue
      }
      msg := m.Value
      var data map[string]interface{}
      if err := json.Unmarshal(msg, &data); err != nil {
         zap.L().Error("json.Unmarshal error",
            zap.String("error", err.Error()))
         continue
      }
      if err := s.WorkerPool.Invoke(data); err != nil {
         zap.L().Error("process alarm rule error",
            zap.String("error", err.Error()))
      }
   }
}

func (s *MessageService) processData(msg interface{}) {
   data := msg.(map[string]interface{})
   var jsonData []byte
   if _, ok := data["data"]; ok {
      var dataArr = data["data"].([]interface{})
      for _, d := range dataArr {
         jsonData, _ = json.Marshal(d)
         for _, topic := range s.Topics {
            err := s.KafkaWriter.Process(jsonData, topic)
            if err != nil {
               zap.L().Error("send kafka topic error", zap.String("error", err.Error()))
            }
         }

      }
   } else {
      jsonData, _ = json.Marshal(msg)
      for _, topic := range s.Topics {
         err := s.KafkaWriter.Process(jsonData, topic)
         if err != nil {
            zap.L().Error("send kafka topic error", zap.String("error", err.Error()))
         }
      }
   }

}

func NewConfig(filePath string) (*Config, error) {
   if filePath == "" {
      return nil, fmt.Errorf("file path is empty\n")
   }

   confBytes, err := os.ReadFile(filePath)
   if err != nil {
      return nil, fmt.Errorf("os.ReadFile(%s) error [%s]", filePath, err.Error())
   }

   conf := &Config{}
   if err := yaml.Unmarshal(confBytes, conf); err != nil {
      fmt.Println("yaml.Unmarshal error [%s]", err.Error())
      return nil, fmt.Errorf("yaml.Unmarshal error [%s]", err.Error())
   }

   return conf, nil
}

func (m *LogCopyService) close() {
   zap.L().Info("关闭消费者kafka...")
   for _, service := range m.MessageServices {
      service.KafkaReader.Close()
   }
   zap.L().Info("关闭生产者kafka...")
   for _, service := range m.MessageServices {
      service.KafkaWriter.Close()
   }
   for _, service := range m.MessageServices {
      service.WorkerPool.Release()
      i := 0
      for {
         if service.WorkerPool.Running() == 0 || i >= 2 {
            break
         }
         i++
         time.Sleep(time.Second * 2)
      }
   }

}

func initZapLogger(conf *Log) (*zap.Logger, error) {
   if conf == nil {
      return nil, errors.New("newLogger error, logger conf is nil")
   }

   hook := lumberjack.Logger{
      Filename:   conf.FileName,   // 日志文件路径
      MaxSize:    conf.MaxSize,    // 每个日志文件保存的最大尺寸 单位:M
      MaxBackups: conf.MaxBackups, // 日志文件最多保存多少个备份
      MaxAge:     conf.MaxAge,     // 文件最多保存多少天
      Compress:   conf.Compress,   // 是否压缩
   }

   encoderConfig := zapcore.EncoderConfig{
      TimeKey:        "time",
      LevelKey:       "level",
      NameKey:        "logger",
      CallerKey:      "line",
      MessageKey:     "msg",
      StacktraceKey:  "stacktrace",
      LineEnding:     zapcore.DefaultLineEnding,
      EncodeLevel:    zapcore.LowercaseLevelEncoder,  // 小写编码器
      EncodeTime:     zapcore.ISO8601TimeEncoder,     // ISO8601 UTC 时间格式
      EncodeDuration: zapcore.SecondsDurationEncoder, //
      EncodeCaller:   zapcore.FullCallerEncoder,      // 全路径编码器
      EncodeName:     zapcore.FullNameEncoder,
   }

   // 设置日志级别
   atomicLevel := zap.NewAtomicLevel()
   atomicLevel.SetLevel(zap.InfoLevel)

   var logger *zap.Logger
   logger = zap.New(zapcore.NewCore(
      zapcore.NewJSONEncoder(encoderConfig),
      zapcore.NewMultiWriteSyncer(zapcore.AddSync(&hook)), // 打印到文件
      atomicLevel,
   ))

   return logger, nil
}
mode: "local"
log:
  file_name: "./logs/server.log"
  max_size: 500
  max_backups: 50
  max_age: 7
  compress: true
  level: "info"
procConfigs:
  - name: "kafka1"
    procs_count: 3
    pool_count: 100
    kafka_writer:
      topics:
        - "kafka1-topic1"
      brokers:
        - "ip:port"
      group_id:
    kafka_reader:
      topics:
        - "kafka1-topic2"
      brokers:
        - "ip:port"
        - "ip:port"
        - "ip:port2"
      group_id: "kafka1-topic2-group"
      group_count: 3
  - name: "kafka2"
    procs_count: 3
    pool_count: 100
    kafka_writer:
      topics:
        - "kafka2-topic1"
      brokers:
        - "ip:port"
      group_id:
    kafka_reader:
      topics:
        - "kafka2-topic2"
      brokers:
        - "ip:port"
      group_id: "kafka2-topic2-group"
      group_count: 3

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Apache Flink 是一个流处理框架,支持实时数据处理和批处理。Flink 可以轻松地与 Apache Kafka 集成,实现从 Kafka 中读取数据并将其写入 HDFS。 下面是实现实时同步 Kafka 数据到 HDFS 的基本步骤: 1. 在 Flink 中引入 Kafka 和 HDFS 的依赖。 2. 创建一个 Flink StreamExecutionEnvironment 对象,并设置相关参数。 3. 创建一个 Kafka 数据源,并从 Kafka 中读取数据。 4. 对读取的数据进行转换和处理。 5. 将处理后的数据写入 HDFS 中。 以下是一个基本的示例代码: ```java import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.core.fs.FileSystem.WriteMode; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer09; public class KafkaToHDFS { public static void main(String[] args) throws Exception { // 从命令行参数中读取参数 final ParameterTool params = ParameterTool.fromArgs(args); // 创建一个 Flink StreamExecutionEnvironment 对象,并设置相关参数 final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(params.getInt("parallelism", 1)); // 设置 Kafka 数据源 Properties props = new Properties(); props.setProperty("bootstrap.servers", "localhost:9092"); props.setProperty("group.id", "test"); FlinkKafkaConsumer09<String> consumer = new FlinkKafkaConsumer09<>( params.getRequired("topic"), new SimpleStringSchema(), props); // 从 Kafka 中读取数据 DataStream<String> stream = env.addSource(consumer); // 对读取的数据进行转换和处理 DataStream<String> transformed = stream.map(new MapFunction<String, String>() { @Override public String map(String value) throws Exception { // 进行转换和处理 return value.toUpperCase(); } }); // 将处理后的数据写入 HDFS 中 transformed.writeAsText(params.getRequired("output"), WriteMode.OVERWRITE); // 执行任务 env.execute("KafkaToHDFS"); } } ``` 在执行上述代码之前,需要先将 Flink 的依赖添加到项目中,并修改示例代码中的相关配置参数,如 Kafka 的连接地址、topic 名称和 HDFS 的输出路径等。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值