用python写了一个多进程批量扫描网站备份文件的小脚本
先把要扫描的url放到文件里,通过读取文件里面的url,批量扫描备份文件
这里利用了多进程模块,速度非常可观
import requests
import re
import multiprocessing
list = ['wwwroot.rar','wwwroot.zip','新建文件夹.rar','新建文件夹.zip','www.rar','www.zip','web.rar','web.zip']
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0'}
content_type = ['application/x-rar','application/x-gzip','application/zip','application/octet-stream','application/x-7z-compressed']
def main(i):
url1 = i.strip('\n')
c = re.findall(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{6}))+',url1)
url2 = (',').join(str(x) for x in c)
for i in list:
try:
b = url2 +'/'+ i
r = requests.head(url = b ,headers = headers)
if r.headers['Content-Type'] in content_type:
with open (r'存放存在的网站的路径','a', encoding ='utf-8' ) as f:
a = f.write('存在备份文件'+ ':' + b + '\n')
else:
print('不存在'+b)
except:
print('无法连接')
if __name__ == '__main__':
p = multiprocessing.Pool(50)#线程数
with open(r'读取url路径','r',encoding = 'utf-8') as f:
a = f.readlines()
for i in a:
p.apply_async(main,(i,))
p.close()
p.join()