PS:能在PAC模式下正常访问外网
requests
import requests
#ssr配置代理
proxies={'http': 'http://127.0.0.1:1080', 'https': 'http://127.0.0.1:1080'}
r = requests.get('https://www.google.com/',proxies=proxies)
print(r.status_code)
scrapy
settings. py
PROXY_LIST = [
{'ip_port': '127.0.0.1:1080'},#test_ip
]
DOWNLOADER_MIDDLEWARES = {
#注册ip代理
'scrapy.downloadermiddleware.useragent.UserAgentMiddleware': None,#禁用内置的中间件,启用自定义
'project_name.middlewares.RandomProxy': 544 #请更改项目名
}
middlewares. py
from scrapy import signals
import random
import base64
from project_name.settings import PROXY_LIST #请更改项目名
from scrapy.downloadermiddlewares.useragent import UserAgentMiddleware
class RandomProxy(object):
def process_request(self, request, spider):
# 随机取出一个代理ip
proxy = random.choice(PROXY_LIST)
# 判断是否为人民币玩家
if 'user_passwd' in proxy:
#把账号密码转换为b64编码格式(字符串先变成bytes类型)必须字符串转为bytes
b64_data = base64.b64encode(proxy['user_passwd'].encode())
# 设置账号密码认证 认证方式 编码之后的账号密码
request.headers['Proxy-Authorization'] = 'Basic ' + b64_data.decode()
# 设置代理
else:
# 免费代理不用认证
request.meta['proxy'] = proxy['ip_port']
配置好后就可以用scrapy.Request()
访问外网了!