version: '3.0'
services:
zoo1:
image: zookeeper:3.4.12
container_name: zoo1
ports:
- "2181:2181"
volumes:
- "/Users/docker/data/zookeeper/zookeeper1/data:/data"
- "/Users/docker/data/zookeeper/zookeeper1/datalog:/datalog"
environment:
ZOO_MY_ID: 4
ZOO_SERVERS: server.4=0.0.0.0:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
zoo2:
image: zookeeper:3.4.12
container_name: zoo2
ports:
- "2182:2181"
volumes:
- "/Users/docker/data/zookeeper/zookeeper2/data:/data"
- "/Users/docker/data/zookeeper/zookeeper2/datalog:/datalog"
environment:
ZOO_MY_ID: 2
ZOO_SERVERS: server.4=zoo1:2888:3888 server.2=0.0.0.0:2888:3888 server.3=zoo3:2888:3888
zoo3:
image: zookeeper:3.4.12
container_name: zoo3
ports:
- "2183:2181"
volumes:
- "/Users/docker/data/zookeeper/zookeeper3/data:/data"
- "/Users/docker/data/zookeeper/zookeeper3/datalog:/datalog"
environment:
ZOO_MY_ID: 3
ZOO_SERVERS: server.4=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=0.0.0.0:2888:3888
broker1:
image: wurstmeister/kafka
container_name: broker1
ports:
- "9091:9092"
depends_on:
- zoo1
- zoo2
- zoo3
environment:
KAFKA_BROKER_ID: 1
KAFKA_ADVERTISED_HOST_NAME: broker1
KAFKA_ADVERTISED_PORT: 9092
KAFKA_HOST_NAME: broker1
KAFKA_ZOOKEEPER_CONNECT: zoo1:2181,zoo2:2181,zoo3:2181
KAFKA_LISTENERS: PLAINTEXT://broker1:9092
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker1:9092
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- "/Users/docker/data/kafka/kafka1/:/kafka"
broker2:
image: wurstmeister/kafka
container_name: broker2
ports:
- "9092:9092"
depends_on:
- zoo1
- zoo2
- zoo3
environment:
KAFKA_BROKER_ID: 2
KAFKA_ADVERTISED_HOST_NAME: broker2
KAFKA_ADVERTISED_PORT: 9092
KAFKA_HOST_NAME: broker2
KAFKA_ZOOKEEPER_CONNECT: zoo1:2181,zoo2:2181,zoo3:2181
KAFKA_LISTENERS: PLAINTEXT://broker2:9092
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker2:9092
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- "/Users/docker/data/kafka/kafka2/:/kafka"
broker3:
image: wurstmeister/kafka
container_name: broker3
ports:
- "9093:9092"
depends_on:
- zoo1
- zoo2
- zoo3
environment:
KAFKA_BROKER_ID: 3
KAFKA_ADVERTISED_HOST_NAME: broker3
KAFKA_ADVERTISED_PORT: 9092
KAFKA_HOST_NAME: broker3
KAFKA_ZOOKEEPER_CONNECT: zoo1:2181,zoo2:2181,zoo3:2181
KAFKA_LISTENERS: PLAINTEXT://broker3:9092
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker3:9092
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- "/Users/docker/data/kafka/kafka3/:/kafka"
# https://segmentfault.com/a/1190000006907443
# https://issues.apache.org/jira/browse/ZOOKEEPER-3828
测试zk连接
docker run -it --rm \
--link zoo1:zk1 \
--link zoo2:zk2 \
--link zoo3:zk3 \
--net docker-compose_default --name zk-cluster-test \
zookeeper zkCli.sh -server zk1:2181,zk2:2181,zk3:2181
通过日志查看是否启动成功。
查询lan值
import time
from time import sleep
from kafka import KafkaProducer, KafkaConsumer, TopicPartition
# 获取Lan值
def get_lan(top, producer, consumer):
partitions = producer.partitions_for(top)
sum = 0
for pt in partitions:
p = TopicPartition(topic=top, partition=pt)
beginning_offsets = consumer.committed(p)
end_offsets = consumer.end_offsets([p])
print(beginning_offsets, end_offsets)
sum = sum + end_offsets[p] - beginning_offsets
return sum
if __name__ == '__main__':
# split_str()
# consume_info()
top = 'biturd_urgent'
top2 = 'biturd_great'
# groupid是对于consumer来说的,一个group组共用一个位置
producer = KafkaProducer(
bootstrap_servers=['ip:9092'])
consumer = KafkaConsumer(top, group_id='biturd_get2',
bootstrap_servers=[
'ip:9092'])]
while True:
a = get_lan(top, producer, consumer)
a2 = get_lan(top2, producer, consumer)
now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
print(top + "当前积压: " + str(a) + "\t当前时间:" + now)
print(top2 + "当前积压: " + str(a2) + "\t当前时间:" + now)
print("========================")
sleep(10)
基本操作
import time
from time import sleep
from kafka import KafkaProducer, KafkaConsumer, TopicPartition
from kafka.errors import kafka_errors
import traceback
import json
def producer_demo():
# 假设生产的消息为键值对(不是一定要键值对),且序列化方式为json
producer = KafkaProducer(
bootstrap_servers=['localhost:9092'],
key_serializer=lambda k: json.dumps(k).encode(),
value_serializer=lambda v: json.dumps(v).encode())
# 发送三条消息
for i in range(0, 3):
future = producer.send(
'kafka_demo',
key='count_num', # 同一个key值,会被送至同一个分区
value=str(i),
partition=1) # 向分区1发送消息
print("send {}".format(str(i)))
try:
future.get(timeout=10) # 监控是否发送成功
except kafka_errors: # 发送失败抛出kafka_errors
traceback.format_exc()
def consumer_demo():
consumer = KafkaConsumer(
'kafka_demo',
bootstrap_servers=':9092',
group_id='test'
)
for message in consumer:
print("receive, key: {}, value: {}".format(
json.loads(message.key.decode()),
json.loads(message.value.decode())
))
def queryMsg():
consumer = KafkaConsumer(
"ip:9092")
# partitions = [TopicPartition(topic, p) for p in consumer.partitions_for_topic(topic)]
partitions = ""
print("start to cal offset:")
# total
toff = consumer.end_offsets(partitions)
toff = [(key.partition, toff[key]) for key in toff.keys()]
toff.sort()
print("total offset: {}".format(str(toff)))
# current
coff = [(x.partition, consumer.committed(x)) for x in partitions]
coff.sort()
print("current offset: {}".format(str(coff)))
# cal sum and left
toff_sum = sum([x[1] for x in toff])
cur_sum = sum([x[1] for x in coff if x[1] is not None])
left_sum = toff_sum - cur_sum
print("kafka left: {}".format(left_sum))
def consume_info():
# auto_offset_reset:重置偏移量,earliest移到最早的可用消息,latest最新的消息,默认为latest
consumer = KafkaConsumer('biturd_great', group_id='biturd_get2', auto_offset_reset='earliest',
bootstrap_servers=[
'ip:9092'])
print(consumer.partitions_for_topic("biturd_joiner")) # 获取biturd_joiner主题的分区信息
print(consumer.topics()) # 获取主题列表
print(consumer.subscription()) # 获取当前消费者订阅的主题
print(consumer.assignment()) # 获取当前消费者topic、分区信息
print(consumer.beginning_offsets(consumer.assignment())) # 获取当前消费者可消费的偏移量
get_lan()
# for message in consumer:
# print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
# message.offset, message.key,
# message.value))
# consumer.seek(TopicPartition(topic=u'test', partition=0), 5) # 重置偏移量,从第5个偏移量消费
for message in consumer:
print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
message.offset, message.key,
message.value))
def split_str():
a = "ip:9092的字符串"
b = a.replace(',', "','")
print("'" + b + "'")
# 获取Lan值
def get_lan(top, producer, consumer):
partitions = producer.partitions_for(top)
sum = 0
for pt in partitions:
p = TopicPartition(topic=top, partition=pt)
beginning_offsets = consumer.committed(p)
end_offsets = consumer.end_offsets([p])
# print(beginning_offsets, end_offsets)
sum = sum + end_offsets[p] - beginning_offsets
return sum
if __name__ == '__main__':
# split_str()
# consume_info()
top = 'ip_urgent'
top2 = 'ip_great'
producer = KafkaProducer(
bootstrap_servers=['ip:9092'])
consumer = KafkaConsumer(top, group_id='biturd_get2', auto_offset_reset='earliest',
bootstrap_servers=[
'ip:9092'])
while True:
a = get_lan(top, producer, consumer)
a2 = get_lan(top2, producer, consumer)
now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
print(top + "当前积压: " + str(a) + "\t当前时间:" + now)
print(top2 + "当前积压: " + str(a2) + "\t当前时间:" + now)
print("========================")
sleep(10)