今天写了一个爬取补天厂商列表的爬虫,好方便进行渗透测试
直接贴出代码:
import requests
from lxml import etree
import os
#引入模块 以dom-tree的方式浏览网页,注意模块要pip install安装才能引入
def Save_File(messageList):
path = '补天厂商列表'
if not os.path.exists(path):
os.mkdir(path)#若不存在这个文件夹则要创建
filepath = '厂商列表.txt'
new_path = os.path.join(path, filepath)
with open(new_path, 'a+', encoding = 'utf8') as f:#以追加的方法将列表写入.txt文件
for x in messageList:
f.write('%s %s\n'% (x[0][0], x[1][0]))#写入厂商名字和url
def load_message(page_message):
dom = etree.HTML(page_message)
i = 2
LM_messageList = []
while True:
index = str(i)
new_xpath = '//table/tr[' + index + ']/td[1]/a/text()'#使用正则匹配厂商名字
title = dom.xpath(new_xpath)
new_url_xpath = '//table/tr[' + index + ']/td[2]/text()'#使用正则匹配厂商URL
url = dom.xpath(new_url_xpath)