Requests库
HTTP请求库,基于urllib
简单演示
#导入requests库
import requests
#传入请求地址赋值给response
response = requests.get("http://www.baidu.com")
#输出response类型
print(type(response))
#输出请求状态码
print(response.status_code)
#输出请求cookies
print(response.cookies)
#输出response.text的类型
print(type(response.text))
#输出response中的内容
print(response.text)
请求
基本GET请求
import requests response = requests.get("http://www.baidu.com") print(response.text)
带参数GET请求
import requests data = {"name":"germey","age":"22"} response = requests.get("http://httpbin.org/get",params=data) print(response.text)
添加headers
有些网站即便是GET请求不添加headers信息也会拒绝响应。import requests #不加headers信息请求知乎 response = requests.get("https://www.zhihu.com/explore") print(response.text) #输出结果如下 #headers里没有浏览器信息,知乎服务器检测是恶意请求,拒绝访问。 <html><body><h1>500 Server Error</h1> An internal server error occured. </body></html> #添加headers信息,加入User-Agent,也就是浏览器信息 headers = {"User-Agent":"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0"} response = requests.get("https://www.zhihu.com/explore",headers=headers) #输出结果正常 print(response.text)
基本POST请求
import requests data = {"name":"aCandy","age":"24"} headers = {"User-Agent":"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0"} response = requests.post("http://httpbin.org/post",data=data,headers=headers) print(response.json())
- 响应
- response属性
BeautifulSoup库
网页解析库
from bs4 import BeautifulSoup
soup = BeautifulSoup(html,'lxml')