The website is the API…
Requests文档:http://cn.python-requests.org/zh_CN/latest/
爬虫通用代码框架
import requests
#定义通用爬虫代码框架
def getHTMLText(url):
try:
# 设置代理服务器,隐藏IP地址,预防爬虫反追踪
pxs = {'https': 'https://10.10.10.1:4321'}
r = requests.request('GET', 'http://www.baidu.com', proxies=pxs)
r.raise_for_status() #如果状态码不是200,则引发HTTPRrror异常
r.encoding = r.apparent_encoding
return r.text
except:
return "产生异常"
#mian
if __name__ == '__main__':
url = "http://www.baidu.com"
print(getHTMLText(url))