经常爬网站会导致ip被封,所以会涉及到购买代理ip的问题,这个就需要自己去找资源购买申请了,下面是申请到了过后怎么用代理ip访问网页的详细过程
- request获取代理ip
import requests
def gei_ip():
url='获取代理ip的网站'
respons=requests.get(url)
if respons.status_code == 200:
print(respons.text)
if respons.text[0]=='{':
return '获取失败'
else:
return [ x for x in respons.text.split('\n') if x]
else:
return '获取失败'
def use_proxy():
ips=gei_ip()
if ips:
proxy={'https':f'https://{ips[0]}'}
respons=requests.get('https://cd.fang.anjuke.com/loupan/all/p1',proxies=proxy)
if respons.status_code == 200:
print(respons.text)
else:
return '请求失败!'
f=use_proxy()
while f == '获取失败'or f=='请求失败!':
use_proxy()
- selenium获取代理ip:
from selenium import webdriver
from selenium.webdriver import ChromeOptions
import requests
# 用谷歌浏览器获取代理ip 或者用火狐浏览器用代理ip
def get_ip():
url='http://piping.mogumiao.com/proxy/api/get_ip_bs?appKey=6226c130427f487385ad7b5235bc603c&count=5&expiryDate=0&format=2&newLine=3'
respons=requests.get(url)
if respons.status_code == 200:
print(respons.text)
if respons.text[0]=='{':
return '获取失败'
else:
return [ x for x in respons.text.split('\n') if x]
else:
return '获取失败'
ips=get_ip()
if ips:
options=webdriver.ChromeOptions()
options.add_argument(f'--proxy-server=https://{ips[0]}')
options.add_argument(f'--proxy-server=http://{ips[1]}')
b = webdriver.Chrome()
b.get('https://cd.fang.anjuke.com/xinfang/')
else:
print('获取失败')