前些天爬虫换了一个蘑菇代理,使用起来还是蛮简单的,记录一下。
蘑菇代理隧道代理介绍
隧道代理使用
scrapy中间件的用法
# 代理服务器
# 旧的用法,已经不行,会报twisted.web.error.SchemeNotSupported: Unsupported scheme: b''的错误,原因是没有加上'http://'
# proxyServer = "transfer.mogumiao.com:9001"
# 如果加上‘https://’ 会报ssl的错误
proxyServer = "http://transfer.mogumiao.com:9001"
# appkey为你订单的key
proxyAuth = "Basic " + "ZzBrb2pmdDUydTY4cnp2aDp4Nk4wbzFxOHRBZXhEV3Ez" # 这一长串是你的Key
class MoGuProxyMiddleWare(object):
"""蘑菇代理中间件"""
def process_request(self, request, spider):
request.meta["proxy"] = proxyServer
request.headers["Authorization"] = proxyAuth
def process_response(self, request, response, spider):
'''对返回的response处理'''
# 如果返回的response状态不是200,重新生成当前request对象
if response.status != 200 and response.status != 301 and response.status != 302:
request.meta["proxy"] = proxyServer
request.headers["Authorization"] = proxyAuth
return request
return response
requests用法
import requests
# 蘑菇代理的隧道订单
appKey = "T1BVYVVNe*******eTQ1Mmdq"
# 蘑菇隧道代理服务器地址
ip_port = 'transfer.mogumiao.com:9001'
# 准备去爬的 URL 链接
url = 'https://ip.cn'
proxy = {"http": "http://" + ip_port, "https": "https://" + ip_port}
headers = {"Proxy-Authorization": 'Basic '+ appKey}
r = requests.get("https://ip.cn", headers=headers, proxies=proxy,verify=False,allow_redirects=False)
print(r.status_code)
print(r.content)
if r.status_code == 302 or r.status_code == 301 :
loc = r.headers['Location']
url_f = "https://ip.cn" + loc
print(loc)
r = requests.get(url_f, headers=headers, proxies=proxy, verify=False, allow_redirects=False)
print(r.status_code)
print(r.text)