说明
目的:帮组学习使用 github.com/segmentio/kafka-go 库
code目录
./adapter
|_ kafka
|_ kafka.go
./configs
|_ kafka.go
producer.go
consumer.go
./configs/kafka.go
package configs
var KafkaBrokers = []map[string]string{
{
"name": "broker1",
"addr": "localhost:9092",
"network": "tcp",
},
// {
// "name": "broker2",
// "addr": "localhost:9093",
// "network": "tcp",
// },
// {
// "name": "broker3",
// "addr": "localhost:9094",
// "network": "tcp",
// },
}
./adapter/kafka/kafka.go
package kafka
import (
"context"
"errors"
"strconv"
"tests/kafka/configs"
"time"
"github.com/segmentio/kafka-go"
)
var (
curControllerConn = new(kafka.Conn)
holdingConnections = map[string]*kafka.Conn{}
)
// 取出一个普通连接。如果存在则复用
func Connect(network string, addr string) (*kafka.Conn, error) {
if network == "" || addr == "" {
return nil, errors.New("network or addr connot be empty")
}
key := combineConnKey(network, addr, "", 0)
if conn, ok := holdingConnections[key]; ok {
return conn, nil
}
conn, err := kafka.Dial(network, addr)
if err != nil {
return nil, err
}
conn.SetReadDeadline(time.Now().Add(5 * time.Second))
holdingConnections[key] = conn
return conn, nil
}
// 查询并连接到topic的partition的实际leader。 addr仅用于查询
func ConnectPartitionLeader(network string, addr string, topic string, partition int) (*kafka.Conn, error) {
if network == "" || addr == "" {
return nil, errors.New("Broker's network and addr connot be empty")
}
if topic == "" || partition < 0 {
return nil, errors.New("Topic or partition is invalid")
}
// 此方法传入的的address地址仅用于查询leader,最终连接不一定是连到addr
conn, err := kafka.DialLeader(
context.Background(),
network,
addr,
topic,
partition,
)
return conn, err
}
// 连接到 topic/partition的leader。
// 如果连接存在则复用
// 应用场景: 当topic的消息对顺序无强制要求时,可建立多个partition,每个partition单独连接(从leader副本读写),提高吞吐量
func ConnectLeader(topic string, partition int) (*kafka.Conn, error) {
bn := len(configs.KafkaBrokers)
ms := time.Now().UnixMilli() % int64(bn)
broker := configs.KafkaBrokers[ms]
return ConnectPartitionLeader(broker["network"], broker["addr"], topic, partition)
}
// 根据conn取得controller连接。如果存在则复用
func ConnectController(conn *kafka.Conn) (*kafka.Conn, error) {
ctrl, err := conn.Controller()
if err != nil {
return nil, err
}
ctrlConn, err := Connect("tcp", ctrl.Host+":"+strconv.Itoa(ctrl.Port))
if err != nil {
return nil, err
}
curControllerConn = ctrlConn
return ctrlConn, nil
}
func GetWriter(topic string) *kafka.Writer {
addrs := make([]string, len(configs.KafkaBrokers))
for i, broker := range configs.KafkaBrokers {
addrs[i] = broker["addr"]
}
return &kafka.Writer{
Addr: kafka.TCP(addrs...),
Topic: topic,
AllowAutoTopicCreation: false,
Balancer: &kafka.LeastBytes{},
}
}
// 获取topic的reader,连接到所有分区。
// 指定partition 和 指定 groupID是两种不同模式,partition固定从某个分区leader读取,groupID从所有分区leader读取
func GetReader(topic string, partition int, groupID string) *kafka.Reader {
addrs := make([]string, len(configs.KafkaBrokers))
for i, broker := range configs.KafkaBrokers {
addrs[i] = broker["addr"]
}
return kafka.NewReader(
kafka.ReaderConfig{
Brokers: addrs,
Topic: topic,
Partition: partition,
GroupID: groupID,
MaxBytes: 10e6,
CommitInterval: 500 * time.Millisecond,
},
)
}
// 关闭所有连接
func CloseAll() {
for k, conn := range holdingConnections {
conn.Close()
delete(holdingConnections, k)
}
}
func combineConnKey(network string, addr string, topic string, partition int) string {
return network + "://" + addr + "@" + topic + "@" + strconv.Itoa(partition)
}
./producer.go
package main
import (
"context"
"flag"
"fmt"
"log"
"math/rand"
"strconv"
"strings"
"time"
"github.com/segmentio/kafka-go"
kafkaConns "tests/kafka/adapter/kafka"
)
var random = rand.New(rand.NewSource(time.Now().UnixNano()))
func main() {
topic := flag.String("topic", "", "Specify the topic")
// partition := flag.Int("partition", 0, "Specify the partition")
total := flag.Int("n", 100, "Total Amount of messages to write")
concurrent := flag.String("c", "1~1", "Concurrent range of messages to write, e.g.: 2, 3~10")
interval := flag.Duration("i", time.Millisecond*500, "Writting interval, e.g.: 100ms, 1s")
flag.Parse()
log.Println("参数: ", *topic, *total, *concurrent, concurrent, interval)
if *topic == "" {
log.Fatal("Param -topic connot be empty!")
return
}
concMin, concMax := parseParamConcurrent(*concurrent, "~")
connWriter := kafkaConns.GetWriter(*topic)
defer connWriter.Close()
totalWrote := 0
msgBatch := make([]kafka.Message, concMax)
for totalWrote < *total {
batchSize := concMin + IfInt(concMax - concMin >= 1, random.Intn(concMax-concMin+1), 0)
batchSize = IfInt(batchSize > *total-totalWrote, *total-totalWrote, batchSize)
for i := 0; i < batchSize; i++ {
msgBatch[i] = kafka.Message{Value: []byte(fmt.Sprintf("Message %d", totalWrote+i))}
}
ctx, _ := context.WithTimeout(context.Background(), 5*time.Second)
err := connWriter.WriteMessages(ctx, msgBatch[0:batchSize]...)
if err != nil {
log.Println("Failed to write messages:", err)
} else {
totalWrote += batchSize
log.Printf("Wrote to topic: %s, Completed: %d/%d\n", *topic, totalWrote, *total)
}
time.Sleep(*interval)
}
fmt.Println("============= Done! ============\n")
}
func parseParamConcurrent(paramConcurrent string, sep string) (int, int) {
tmp1, tmp2, _ := strings.Cut(paramConcurrent, sep)
concMin, _ := strconv.ParseInt(tmp1, 10, 32)
concMax, _ := strconv.ParseInt(tmp2, 10, 32)
if concMin <= 0 {
concMin = 1
}
if concMax <= concMin {
concMax = concMin
}
return int(concMin), int(concMax)
}
func IfInt(condition bool, trueVal, falseVal int) int {
if condition {
return trueVal
}
return falseVal
}
./consumer.go
package main
import (
"context"
"flag"
"fmt"
"log"
"time"
"github.com/segmentio/kafka-go"
kafkaConns "tests/lib/kafka/adapter/kafka"
)
func main() {
topic := flag.String("topic", "", "Specify the topic")
partition := flag.Int("partition", 0, "Specify the partition, not necessary if group is presented")
partitionOffset := flag.Int64("offset", 0, "Specify an offset to continue task if consume by partition")
consumerGroup := flag.String("group", "", "Specify the consumer group. ")
// total := flag.Int("n", 0, "Total Amount of messages to read")
concurrent := flag.Int("c", 1, "The batch size of dealing with messages, greater than 1 means merging multiple messages to deal with at once")
interval := flag.Duration("i", time.Millisecond*500, "Writting interval, e.g.: 100ms, 1s")
flag.Parse()
partitionStr := IfString(*consumerGroup=="", fmt.Sprintf("%d",*partition), "--")
log.Printf("参数: topic:%s, partition:%s, group:%s, interval:%s\n", *topic, partitionStr, *consumerGroup, *interval)
if *topic == "" {
log.Fatal("Param -topic connot be empty!")
return
}
if *consumerGroup == "" && *partition < 0 {
log.Fatal("Either -group or -partition should be provided!")
return
}
connReader := kafkaConns.GetReader(*topic, *partition, *consumerGroup)
defer connReader.Close()
// 如果是按分区(而非组)消费,则需要自己记录消费截止offset,并在此处需要设置起始offset以继续
if *consumerGroup == "" {
connReader.SetOffset(*partitionOffset)
// connReader.SetOffsetAfterAt(time.Now())
}
totalConsumed := 0
ctx := context.Background()
batch := make([]kafka.Message, *concurrent)
for {
batchOffsets := ""
// read a batch of messages
for i:=0; i < *concurrent; i++ {
m, err := connReader.FetchMessage(ctx)
if err != nil {
log.Println("Failed to read messages:", err)
continue
}
batch[i] = m
batchOffsets += fmt.Sprintf("%d,", m.Offset)
}
if succeededMsgs, err := consumeTask1(batch); err != nil {
log.Println("Failed to deal message consuming:", err)
} else {
// 仅仅按分组消费时才需要commit
if *consumerGroup != "" {
ctx := context.Background()
for _, m := range succeededMsgs {
if err := connReader.CommitMessages(ctx, m); err != nil{
log.Println("Failed to commit messages:", err)
}
}
}
totalConsumed += len(succeededMsgs)
log.Printf("Consume topic: %s/%s, Group:%s, Completed: %s\n", *topic, partitionStr, *consumerGroup, batchOffsets)
}
if *interval > 0 {
time.Sleep(*interval)
}
}
}
// 消费消息
func consumeTask1(msgs []kafka.Message) (successMsgs []kafka.Message, err error) {
// TODO: 处理消息
successMsgs = msgs
return
}
func IfString(condition bool, trueVal, falseVal string) string {
if condition {
return trueVal
}
return falseVal
}
运行示例
例:向 user_hit_logs 写入10000条,30~120条每次, 每次延时300ms
go run ./producer.go -topic=user_hit_logs -n 10000 -c 30~120 -i=300ms
例:从 user_hit_logs 持续消费,消费组:hit_log_saver, 合并10条/每批处理, 延时间隔:100ms
go run ./consumer.go -topic=user_hit_logs -group=hit_log_saver -c=10 -i=100ms

387

被折叠的 条评论
为什么被折叠?



