提取GET报文的url

import os
import re
import codecs

‘’’
pcap文件提取GET报文的url,并将url放入到txt文件中
‘’’
class pcap():
def init(self, file,file2):
self.file = file
self.uristart=b’GET ’
self.uriend=b’HTTP/’
self.hoststart = b’Host:’
self.hostend = b’\r\n’
self.f = open(self.file,‘rb’)
self.srcbytearr = self.f.read()
self.uristartindex = 0
self.uriendindex = 0
self.hoststartindex = 0
self.hostendindex = 0
self.uristart_index = 0
self.uriend_index = 0
self.f_output = open(file2,‘a+’)

def http_geturl(self):		
	while True :
		self.uristart_index=self.srcbytearr.find(self.uristart,self.uristartindex)
		self.uriend_index=self.srcbytearr.find(self.uriend,self.uristart_index)
		if self.uristart_index == -1:
			break
		elif self.uriend_index == -1:
			break
		start=self.uristart_index+4
		end=self.uriend_index-1
		uri = self.srcbytearr[start:end]
		self.hoststart_index=self.srcbytearr.find(self.hoststart,self.uriend_index)
		self.hostend_index=self.srcbytearr.find(self.hostend,self.hoststart_index)
		if self.hoststart_index == -1:
			break
		elif self.hostend_index == -1:
			break
		start=self.hoststart_index+6
		end=self.hostend_index
		host = self.srcbytearr[start:end]

		url = host+uri
		print(str(url,encoding='utf-8'))
		self.f_output.write(str(url,encoding='utf-8'))
		self.f_output.write('\n')
		self.uristartindex = self.hoststart_index+4
		#return url
	self.f_output.close()

f2=‘result.txt’
for fpathe,dirs,fs in os.walk(‘C:/Users/s1/Desktop/dpi/http/’):#路径可修改
for fl in fs:
s=pcap(os.path.join(fpathe,fl),f2)
s=pcap(‘C:/Users/s1/Desktop/http_10000.pcap’,f2)
s.http_geturl()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值