kafka日志处理类
import logging
import json
from datetime import datetime
from kafka import KafkaProducer
import socket
import configparser
#配置文件
conf_file = 'my.conf'
cp = configparser.SafeConfigParser(strict=False)
cp.read(conf_file,encoding='utf-8')
#获取当前主机IP地址
def get_local_ip():
hostname = socket.gethostname()
# 根据主机名获取本机IP地址列表
local_ip = socket.gethostbyname(hostname)
return local_ip
#从配置文件中获取kafka的hosts
def get_kafka_hosts():
hosts = cp.get('kafka', 'hosts')
host_list = hosts.split(',')
host_list = list(filter(lambda x: x != '' and x is not None, host_list))
return host_list
class KafkaLoggingHandler(logging.Handler):
def __init__(self):
logging.Handler.__init__(self)
# 这里是用来确定使用哪一个topic
self.config_topic = 'mytopic'
# 获取ip
self.hostname = get_local_ip()
self.hosts = get_kafka_hosts()
self.producer = KafkaProducer(bootstrap_servers=self.hosts, compression_type='gzip', max_block_ms=1000, acks=0)
# 重写emit方法
def emit(self, record):
# 获取到日志里面需要的数据
logging_dict = getattr(record, '__dict__')
logging_dict['message'] = self.format(record)
#if isinstance(message, unicode):
# msg = message.encode("utf-8")
# 对日志数据进行进一步处理,添加一些必要的数据
logging_dict['TimeStamp'] = str(datetime.now().replace(microsecond=0))
logging_dict['HOSTNAME'] = self.hostname
# 将数据dump成json字符串
msg = json.dumps(logging_dict)
# 发送到kafka,partition_key生成规则,timestamp是创建消息时间,注意需要是bytes类型,因此对字符串操作都是进行encode的处理
try:
print('send kafka msg:', msg)
self.producer.send(self.config_topic, key='py-kf-logging'.encode('utf-8'), value=msg.encode('utf-8'))
self.producer.flush()
except Exception as e:
logging.error('send error:', e)
#self.producer.close()
class KafkaLoggingUtils:
level = logging.INFO
format = '%(asctime)s %(name)s[line:%(lineno)d] %(levelname)s %(message)s'
datefmt = '%a,%d %b %Y %H:%M:%S'
name = __name__
@staticmethod
def getLogger(name):
#stdoutHandler = logging.StreamHandler() #stream=sys.stdout
# logging的一些配置
logging.basicConfig(level=KafkaLoggingUtils.level, format=KafkaLoggingUtils.format, datefmt=KafkaLoggingUtils.datefmt)
logger = logging.getLogger(name)
# 加入logging的handler
handler = KafkaLoggingHandler()
logger.addHandler(hdlr=handler)
return logger
使用
生产端:
logging = KafkaLoggingUtils.getLogger(project_name)
logging.info("%s 任务开始" % project_name)
消费端
source_topic = "mytopic"
bootstrap_servers = get_kafka_hosts()
def main():
while True:
try:
msg_list = []
consumer = KafkaConsumer(source_topic, bootstrap_servers=bootstrap_servers)
conn, cursor = conn_crawl_db()
for msg in consumer:
project_id = 0
message = msg.value
# 获取该条日志
message = message.decode('utf-8')
try:
data_dict = json.loads(message)
remsg = ''
hostip = ''
logtime = ''
proname = ''
levelname = ''
if type(data_dict) == dict:
if 'levelname' in data_dict.keys():
levelname = data_dict["levelname"]
if 'name' in data_dict.keys():
proname = data_dict["name"]
if 'HOSTNAME' in data_dict.keys():
hostip = data_dict["HOSTNAME"]
if 'TimeStamp' in data_dict.keys():
logtime = data_dict["TimeStamp"]
if 'msg' in data_dict.keys():
remsg = data_dict["msg"]
print(proname,remsg,hostip,levelname,logtime,str(datetime.now().replace(microsecond=0)))
except Exception as e:
print(message)
print(e)
except Exception as e:
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(now, e)
#consumer.close()
if __name__ == '__main__':
main()