利用生产者消费者模型及DPKT处理pcap文件

本文链接：https://blog.csdn.net/lepton126/article/details/124550335

1、使用find命令生成pcap文件列表文件

2、使用dpkt 提取pcap源地址源端口目的地址目的端口，打印程序消耗时间

find 路径 -type f -name *.pcap >pcaplist.txt

#!/bin/env python3
import time
from multiprocessing import Process,JoinableQueue,cpu_count
import dpkt
import datetime
from dpkt.utils import mac_to_str,inet_to_str
#使dpkt提取pcap文件中每个的相关信息
def print_packets(pcap):
for timestamp,buf in pcap:
print('Timestamp: ',str(datetime.datetime.utcfromtimestamp(timestamp)))
eth = dpkt.ethernet.Ethernet(buf)
print('Ethernet Frame: ',mac_to_str(eth.src),mac_to_str(eth.dst),eth.type)
if not isinstance(eth.data,dpkt.ip.IP):
print('Non IP Packet type not supported %s\n'% eth.data.__class__.__name__)
continue
ip = eth.data
print('IP: %s -> %s (len=%d ttl=%d DF=%d MF=%d offset=%d)\n' %(inet_to_str(ip.src),inet_to_str(ip.dst),ip.len,ip.ttl,ip.df,ip.mf,ip.offset))
print('**Pretty print demo **\n')
eth.pprint()
#将读取的pcapfile文件名，处理结束后将准备要进行处理的文件名写入result_f
def analyze_pcap(pcapfile,result_f):
pcapfile=pcapfile.strip()
with open(pcapfile,'rb') as pcapfilef:
pcap = dpkt.pcap.Reader(pcapfilef) #调用dpkt，读取pcap文件
print_packets(pcap) #功能函数，这里打印相关信息，可以更换为其它的功能函数

result_f.write(pcapfile) #处理结束后，将被处理的pcap文件名写入一个文件，作为log记录
result_f.flush()#这个一定要有，存入硬盘
#一个消费者函数，读取需要处理的文件的文件列表
def consumer(queue,result_f):
while True:
pcapfile = queue.get()
analyze_pcap(pcapfile,result_f)
queue.task_done()
#一个生产者函数，从需要处理的文件列表中读取文件名，放入队列
def producer(queue):
with open('pcapfilelist.txt','r') as f:
for pcapfilename in f:
queue.put(pcapfilename)

def main():
with open('processed_pcapfile.txt','w+') as result_f:
queue = JoinableQueue(40) #限制队列数
pc = Process(target=producer,args=(queue,))
pc.start()

for _ in range(cpu_count()):
c1 = Process(target=consumer,args=(queue,result_f))
c1.daemon = True
c1.start()
pc.join()
queue.join()

if __name__ == '__main__':
now = lambda:time.time()
start = now()
main()
print("Time:",now() - start)