import os
import re
import codecs
‘’’
pcap文件提取GET报文的url,并将url放入到txt文件中
‘’’
class pcap():
def init(self, file,file2):
self.file = file
self.uristart=b’GET ’
self.uriend=b’HTTP/’
self.hoststart = b’Host:’
self.hostend = b’\r\n’
self.f = open(self.file,‘rb’)
self.srcbytearr = self.f.read()
self.uristartindex = 0
self.uriendindex = 0
self.hoststartindex = 0
self.hostendindex = 0
self.uristart_index = 0
self.uriend_index = 0
self.f_output = open(file2,‘a+’)
def http_geturl(self):
while True :
self.uristart_index=self.srcbytearr.find(self.uristart,self.uristartindex)
self.uriend_index=self.srcbytearr.find(self.uriend,self.uristart_index)
if self.uristart_index == -1:
break
elif self.uriend_index == -1:
break
start=self.uristart_index+4
end=self.uriend_index-1
uri = self.srcbytearr[start:end]
self.hoststart_index=self.srcbytearr.find(self.hoststart,self.uriend_index)
self.hostend_index=self.srcbytearr.find(self.hostend,self.hoststart_index)
if self.hoststart_index == -1:
break
elif self.hostend_index == -1:
break
start=self.hoststart_index+6
end=self.hostend_index
host = self.srcbytearr[start:end]
url = host+uri
print(str(url,encoding='utf-8'))
self.f_output.write(str(url,encoding='utf-8'))
self.f_output.write('\n')
self.uristartindex = self.hoststart_index+4
#return url
self.f_output.close()
f2=‘result.txt’
for fpathe,dirs,fs in os.walk(‘C:/Users/s1/Desktop/dpi/http/’):#路径可修改
for fl in fs:
s=pcap(os.path.join(fpathe,fl),f2)
s=pcap(‘C:/Users/s1/Desktop/http_10000.pcap’,f2)
s.http_geturl()