Python_各个代理网站/代理设置/代理池使用

本文详细介绍如何使用代理IP防止爬虫被封,包括常见代理网站、代理池搭建及多种Python模块如requests、httpx、aiohttp等的代理设置方法。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

一、常见的ip代理网站收集

二、代理池使用

1、崔大开源的代理池
  • 崔大的代理池子代码,直接下载整个代码,然后pip install -r requirements.txt -i https://pypi.douban.com/simple , 然后直接运行run.py即可,然后打开http://localhost:5555/random就可以随机获取代理ip了
    在这里插入图片描述
  • 测试获取代理ip的代码
    import requests
    import logging
    
    
    def main():
        """
        main method, entry point
        :return: none
        """
        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"}
        for retry_times in range(10):
            proxy = requests.get('http://127.0.0.1:5555/random').text.strip()
            print('get random proxy', proxy)
            proxies = {'http': f'http://{proxy}'}
            try:
                html = requests.get('http://httpbin.org/get', proxies=proxies, timeout=5, headers=headers)
                print(html.status_code, html.text)
                break
            except Exception as err:
                logging.warning(err)
    
    
    if __name__ == '__main__':
        main()
    
    
2、jhao104开源的代理池
  • jhao104代理池代码 ,,直接下载整个代码,然后pip install -r requirements.txt -i https://pypi.douban.com/simple , 然后setting.py里面改下DB_CONN = 'redis://@127.0.0.1:6379/0',然后运行python proxyPool.py schedule , python proxyPool.py server 即可,然后点击http://127.0.0.1:5010/get/就可以随机获取代理ip了
    在这里插入图片描述
    在这里插入图片描述

  • 调用代理代码样例

    import requests
    import logging
    
    
    def main():
        """
        main method, entry point
        :return: none
        """
        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"}
        for retry_times in range(10):
            proxy = requests.get('http://127.0.0.1:5010/get/').json()["proxy"]
            print('get random proxy', proxy)
            proxies = {'http': f'http://{proxy}'}
            try:
                html = requests.get('http://httpbin.org/get', proxies=proxies, timeout=5, headers=headers)
                print(html.status_code, html.text)
                break
            except Exception as err:
                logging.warning(err)
    
    
    if __name__ == '__main__':
        main()
    

三、各个模块设置代理

1、requests设置代理ip
  • 设置代理,或者是SOCKS的代理类型
    import requests
    
    
    proxy = "58.240.220.86:5281"
    proxies = {'https': f'https://{proxy}', 'http': f'http://{proxy}'}
    socks_proxies = {'https': f'socks5://{proxy}', 'http': f'socks5://{proxy}'}  # 代理类型是SOCKS
    try:
        response = requests.get('https://httpbin.org/get', proxies=proxies)
        print(response.json()["origin"])
    except requests.exceptions.ConnectionError as e:
        print('Error', e.args)
    
  • 利用sockets设置全局的代理
    import requests
    import socks
    import socket
    
    # 设置全局的ip代理
    socks.set_default_proxy(socks.SOCKS5, '58.240.220.86', '5281')
    socket.socket = socks.socksocket
    try:
        response = requests.get('https://httpbin.org/get')
        print(response.json()["origin"])
    except requests.exceptions.ConnectionError as e:
        print('Error', e.args)
    
2、httpx设置代理ip
  • 设置代理
    import httpx
    
    
    proxy = "58.240.220.86:5281"
    proxies = {'https': f'https://{proxy}', 'http': f'http://{proxy}'}
    with httpx.Client(proxies=proxies) as client:
        response = client.get('https://httpbin.org/get')
        print(response.json()["origin"])
    
3、aiohttp设置代理ip
  • 设置代理
    import asyncio
    import aiohttp
    
    
    proxy = "http://58.240.220.86:5281"
    
    
    async def main():
        async with aiohttp.ClientSession() as session:
            async with session.get('http://httpbin.org/get', proxy=proxy) as response:
                print(await response.text())
    
    
    asyncio.run(main())
    
4、selenium设置代理ip
  • 以Chrome为例
    from selenium import webdriver
    
    
    chromeOptions = webdriver.ChromeOptions()
    # 代理ip+port:58.240.220.86:53281
    proxy = '58.240.220.86:53281'
    chromeOptions.add_argument(f'--proxy-server=http://{proxy}')
    # chromeOptions.add_argument(f'--proxy-server=socks5://{proxy}')
    browser = webdriver.Chrome(options=chromeOptions)
    browser.get('http://httpbin.org/get')
    print(browser.page_source)
    browser.close()
    
5、pyppeteer设置代理ip
  • 设置代理
    import asyncio
    from pyppeteer import launch
    
    
    async def main():
        proxy = '58.240.220.86:53281'
        browser = await launch({"args": [f'--proxy-server=http://{proxy}'], "headless": False})
        # browser = await launch({"args": [f'--proxy-server=socks5://{proxy}'], "headless": False})
        page = await browser.newPage()
        await page.goto('https://httpbin.org/get')
        print(await page.content())
        await browser.close()  # 关闭浏览器对象
    
    
    asyncio.get_event_loop().run_until_complete(main())
    
6、Playwright设置代理ip
  • 设置代理
    from playwright.sync_api import sync_playwright
    
    
    proxy = '58.240.220.86:5281'
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=False, proxy={'server': f'http://{proxy}'})
        # browser = p.chromium.launch(headless=False, proxy={'server': f'socks5://{proxy}'})
        page = browser.new_page()
        page.goto('https://httpbin.org/get')
        print(page.content())
        browser.close()
    
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值