之前跑2000多个url 用requests写的简单脚本跑了两三个小时,属实顶不住,用asyncio+aiohttp写了一个,代码写的可能不咋地,但能跑,出结果没啥问题
# -*- coding: UTF-8 -*-
import time
import asyncio
import sys
import os
import datetime
import aiohttp
import warnings
import concurrent
warnings.filterwarnings("ignore")
if os.path.exists('./scan_result') == 0:
os.makedirs('./scan_result')
url_result_success = []
tasks = []
def save_result():
global url_result_success
print(url_result_success)
nowTime = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
nowTime = str(nowTime)
filename = './scan_result/URL_alive_{}.txt'.format(nowTime)
fn = open(filename, mode='w+')
fn.write('本次扫描存活个数:' + str(len(url_result_success)) + '\n')
fn.writelines(url_result_success)
async def check(url, semaphore):
global url_result_success
try:
async with semaphore:
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(limit=64, verify_ssl=False)) as session:
async with session.get(url) as resp:
print(url + " " + str(resp.status))
if resp.status == 200:
url_result_success.append(url+'\n')
return await resp.text()
except (aiohttp.client_exceptions.ClientConnectorError, concurrent.futures._base.TimeoutError) as e:
#print(e)
pass
def run(url_text):
f = open(url_text, 'r')
url = f.readlines()
f.close()
length = len(url)
print("本次扫描共探测" + str(length) + "个url的存活情况")
for i in range(0, length):
semaphore = asyncio.Semaphore(300) # 限制并发量为300
task = asyncio.ensure_future(check("https://"+url[i].strip(), semaphore))
tasks.append(task)
result = loop.run_until_complete(asyncio.gather(*tasks))
#print(result)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
run("url.txt")
# loop.run_until_complete(asyncio.wait(tasks))
save_result()