免费代理网站:
使用了几个代理不太管用,最后使用这个爬到了:
proxy_addr = “163.125.251.172:8088”
代码段如下–百度好像这样爬不到内容,data的长度很短,CSDN博客首页就可以。
import urllib.request
import urllib.error
def user_proxy(url,proxy_addr):
proxy = urllib.request.ProxyHandler({"http":proxy_addr})
opener = urllib.request.build_opener(proxy,urllib.request.HTTPHandler)
# 安装为全局
urllib.request.install_opener(opener)
data = urllib.request.urlopen(url).read().decode("utf-8","ignore")
return data
proxy_addr = "163.125.251.172:8088"
url = "http://baidu.com/"
url = "https://blog.csdn.net/"
try:
data = user_proxy(url=url, proxy_addr=proxy_addr)
print(data)
print("--爬取成功")
except urllib.error.URLError as e:
if hasattr(e,"code"):
print(e.code)
if hasattr(e,"reason"):
print(e.reason)