基于python的爬虫，批量爬取漏洞poc

最新推荐文章于 2023-12-08 18:14:01 发布

Thegentlest

最新推荐文章于 2023-12-08 18:14:01 发布

阅读量1.7k

点赞数 2

分类专栏：网络安全文章标签： python 爬虫网络安全

本文链接：https://blog.csdn.net/Thegentlest/article/details/126821599

版权

网络安全专栏收录该内容

7 篇文章 0 订阅

订阅专栏

import time

import requests
import threading
from queue import Queue
from bs4 import BeautifulSoup as bs


#Poc页面 https://www.seebug.org/vuldb/vulnerabilities?has_poc=true&page=2

#漏洞详情页面 https://www.seebug.org/vuldb/ssvid-97587

#下载Poc GET /vuldb/downloadPoc/97967


class SeeBugPoc(threading.Thread):
    def __init__(self, queue):
        threading.Thread.__init__(self)
        self._queue = queue

    def run(self):
        while not self._queue.empty():
            url = self._queue.get_nowait()
            self.spider(url)

    def spider(self, url):
        r = requests.get(url, headers=headers)
        # print(r.status_code)

        soup = bs(r.content, 'lxml')
        vulas = soup.find_all(name='a', attrs={'class':'vul-title'})
        for vula in vulas:
            ssvid = vula['href'].split('-')[-1]
            vulname = vula['title']
            download_url = f"https://www.seebug.org/vuldb/downloadPoc/{ssvid}"
            download_res = requests.get(download_url, headers=headers)
            time.sleep(2)
            with open(f"pocs/{vulname}.py", "a+", encoding='utf-8') as f:
                try:
                    f.write(download_res.content.decode('utf-8'))
                except Exception as e:
                    pass
            print(f"{vulname}poc保存成功")

if __name__ == '__main__':

    headers = {
        'Host': 'www.seebug.org',
        'Cookie': '', #放入自己的cookie
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
        'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
        'Accept-Encoding': 'gzip, deflate',
        'Referer': 'https://www.seebug.org/vuldb/ssvid-99364',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'same-origin',
        'Sec-Fetch-User': '?1',
        'Te': 'trailers',
        'Connection': 'close',
    }
    queue = Queue()
    #起始与结束页面
    for i in range(21, 26): 
        queue.put(f"https://www.seebug.org/vuldb/vulnerabilities?has_poc=true&page={str(i)}")

    threads = []
    #线程数量
    thread_count = 4

    for i in range(thread_count):
        threads.append(SeeBugPoc(queue))

    for t in threads:
        t.start()
        t.join()

本文采用爬虫技术爬取知道创宇的seebug漏洞平台的poc，采用多线程