import json
import time
import argparse
import threading
from concurrent.futures import ThreadPoolExecutor
from kafka import KafkaProducer
import numpy as np
class KafkaStressTest:
def __init__(self, config):
self.config = config
self.producer = self._init_producer()
self.lock = threading.Lock()
self.stats = {
'total_sent': 0,
'start_time': None,
'end_time': None,
'throughput': 0,
'avg_latency': 0,
'errors': 0
}
def _init_producer(self):
"""初始化 Kafka 生产者"""
return KafkaProducer(
bootstrap_servers=self.config['bootstrap_servers'],
security_protocol=self.config['security_protocol'],
sasl_mechanism=self.config.get('sasl_mechanism', None),
sasl_plain_username=self.config.get('sasl_username', None),
sasl_plain_password=self.config.get('sasl_password', None),
ssl_cafile=self.config.get('ssl_cafile', None),
value_serializer=lambda v: json.dumps(v).encode('utf-8'),
batch_size=self.config['batch_size'],
linger_ms=self.config['linger_ms'],
acks=self.config['acks'],
retries=self.config['retries'],
compression_type=self.config.get('compression_type', None)
)
def _generate_message(self):
"""生成测试消息"""
msg_size = self.config.get('message_size', 1024)
return {
'timestamp': int(time.time() * 1000),
'payload': 'A' * msg_size,
'meta': {
'test_id': self.config['test_id'],
'thread_id': threading.get_ident()
}
}
def _send_batch(self, topic, batch_size):
"""批量发送消息"""
messages = [self._generate_message() for _ in range(batch_size)]
start = time.time()
try:
future = self.producer.send(topic, messages)
future.get(timeout=self.config['request_timeout'])
latency = (time.time() - start) * 1000 # 转换为毫秒
with self.lock:
self.stats['total_sent'] += batch_size
self.stats['avg_latency'] = (
self.stats['avg_latency'] * (self.stats['total_sent'] - batch_size) + latency
) / self.stats['total_sent']
except Exception as e:
with self.lock:
self.stats['errors'] += 1
print(f"发送失败: {str(e)}")
def run(self, topic, duration, batch_size=100, threads=8):
"""启动压测"""
self.stats['start_time'] = time.time()
print(f"开始压测: {self.config['test_id']} | 持续时间: {duration}秒")
with ThreadPoolExecutor(max_workers=threads) as executor:
futures = []
for _ in range(duration * 10): # 每秒发送10批
futures.append(executor.submit(self._send_batch, topic, batch_size))
time.sleep(0.1)
for future in futures:
future.result()
self.stats['end_time'] = time.time()
self._generate_report(topic)
def _generate_report(self, topic):
"""生成测试报告"""
total_time = self.stats['end_time'] - self.stats['start_time']
self.stats['throughput'] = self.stats['total_sent'] / total_time
print("\n=== 压测报告 ===")
print(f"测试主题: {topic}")
print(f"总耗时: {total_time:.2f}秒")
print(f"总消息数: {self.stats['total_sent']:,}")
print(f"吞吐量: {self.stats['throughput']:,.0f} msg/s")
print(f"平均延迟: {self.stats['avg_latency']:.2f} ms")
print(f"错误率: {self.stats['errors'] / self.stats['total_sent'] * 100:.2f}%")
print("=== 测试结束 ===")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Kafka 压测工具')
parser.add_argument('--topic', type=str, required=True, help='测试主题')
parser.add_argument('--duration', type=int, default=60, help='测试时长(秒)')
parser.add_argument('--threads', type=int, default=8, help='线程数')
parser.add_argument('--batch-size', type=int, default=100, help='每批消息数')
args = parser.parse_args()
# 配置参数
config = {
'bootstrap_servers': 'b-1.msk-cluster.amazonaws.com:9094',
'security_protocol': 'SASL_SSL',
'sasl_mechanism': 'SCRAM-SHA-256',
'sasl_username': 'producer',
'sasl_password': 'secret',
'ssl_cafile': '/opt/kafka/certs/ca-cert.pem',
'batch_size': args.batch_size * 1024, # 转换为字节
'linger.ms': 200,
'acks': 'all',
'retries': 5,
'compression_type': 'lz4',
'request_timeout': 30,
'test_id': f"kafka-stress-test-{int(time.time())}"
}
# 启动压测
test = KafkaStressTest(config)
test.run(args.topic, args.duration, args.batch_size, args.threads)