爬虫代理的设置

1. requests 中的设置

import requests

import random

# 测试url
url ='https://www.baidu.com'
url2 ='https://httpbin.org/get'
# 代理池
proxy_pool =['138.201.223.250:31288', '196.13.208.23:8080', '91.197.132.99:53281']

def get_content(url,proxy_pool):
    # proxy = '138.201.223.250:31288'
    proxy = random.choices(proxy_pool)[0]
    print('本次使用的代理为:%s'%proxy)
    proxies = {
        'https':'http://'+proxy,
        'http': 'http://' + proxy
    }

    try :
        res = requests.get(url=url,proxies=proxies)
        print(res.status_code)
        print(res.text)

    except requests.exceptions.ConnectionError as e:

        print('Error:',e.args)

get_content(url2,proxy_pool)
# get_content(url,proxy_pool)

2. 在selenium 中的设置

import random
import requests
from selenium import webdriver
# 测试url
url ='https://www.baidu.com'
url2 ='http://httpbin.org/get'
# 代理池
proxy_pool =['138.201.223.250:31288', '196.13.208.23:8080', '91.197.132.99:53281']

proxy = random.choices(proxy_pool)[0]

chrome_options = webdriver.ChromeOptions()

chrome_options.add_argument('--proxy_server=https://%s'%proxy) bro = webdriver.Chrome(chrome_options=
chrome_options) bro.get(url)

3. PhantomJS中的设置

from selenium import webdriver


url ='http://www.baidu.com'
url2 ='http://httpbin.org/get'
service_args =[
    '--proxy=196.13.208.23:8080',
    '--proxy_type=https'
]
bro =webdriver.PhantomJS(executable_path=r'D:\phantomjs\bin\phantomjs.exe',service_args=service_args)

bro.get(url)

print(bro.page_source)

待续!

转载于:https://www.cnblogs.com/knighterrant/p/10798366.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值