提取GET报文的url

最新推荐文章于 2021-11-16 22:29:50 发布

游青

最新推荐文章于 2021-11-16 22:29:50 发布

阅读量359

点赞数

本文链接：https://blog.csdn.net/weixin_40676393/article/details/104739904

版权

import os
import re
import codecs

‘’’
pcap文件提取GET报文的url,并将url放入到txt文件中
‘’’
class pcap():
def init(self, file,file2):
self.file = file
self.uristart=b’GET ’
self.uriend=b’HTTP/’
self.hoststart = b’Host:’
self.hostend = b’\r\n’
self.f = open(self.file,‘rb’)
self.srcbytearr = self.f.read()
self.uristartindex = 0
self.uriendindex = 0
self.hoststartindex = 0
self.hostendindex = 0
self.uristart_index = 0
self.uriend_index = 0
self.f_output = open(file2,‘a+’)

def http_geturl(self):		
	while True :
		self.uristart_index=self.srcbytearr.find(self.uristart,self.uristartindex)
		self.uriend_index=self.srcbytearr.find(self.uriend,self.uristart_index)
		if self.uristart_index == -1:
			break
		elif self.uriend_index == -1:
			break
		start=self.uristart_index+4
		end=self.uriend_index-1
		uri = self.srcbytearr[start:end]
		self.hoststart_index=self.srcbytearr.find(self.hoststart,self.uriend_index)
		self.hostend_index=self.srcbytearr.find(self.hostend,self.hoststart_index)
		if self.hoststart_index == -1:
			break
		elif self.hostend_index == -1:
			break
		start=self.hoststart_index+6
		end=self.hostend_index
		host = self.srcbytearr[start:end]

		url = host+uri
		print(str(url,encoding='utf-8'))
		self.f_output.write(str(url,encoding='utf-8'))
		self.f_output.write('\n')
		self.uristartindex = self.hoststart_index+4
		#return url
	self.f_output.close()

f2=‘result.txt’
for fpathe,dirs,fs in os.walk(‘C:/Users/s1/Desktop/dpi/http/’):#路径可修改
for fl in fs:
s=pcap(os.path.join(fpathe,fl),f2)
s=pcap(‘C:/Users/s1/Desktop/http_10000.pcap’,f2)
s.http_geturl()

游青

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
提取GET报文的url

import osimport reimport codecs‘’’pcap文件提取GET报文的url,并将url放入到txt文件中‘’’class pcap():def init(self, file,file2):self.file = fileself.uristart=b’GET ’self.uriend=b’HTTP/’self.hoststart = b’Host...
复制链接

扫一扫