1.res.text 和 res.content
import requests
res = requests.get('http://www.baidu.com/')
print(type(res.text))
print(res.text) 自动解码 有时候不准确
print(type(res.content))
print(res.content.decode('utf-8')) 手动解码 通过res.content.decode('utf-8')来解成utf-8
import requests
res = requests.get('http://www.baidu.com/')
print(res.url) 打印请求的url
print(res.encoding) 打印请求的编码格式
print(res.status_code) 打印页面的返回码 200表示成功params = {
'wd':'中国'
}
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6726.400 QQBrowser/10.2.2265.400'
}
res = requests.get('http://www.baidu.com/s',params=params,headers=headers) requests.get() 可写入url params headers
with open('baidu.html','w',encoding='utf-8') as f: 指定使用encoding=utf-8编码
f.write(res.content.decode('utf-8')) 写入时要用str类型 content是bytes类型 需要用decode解码成str
print(res.url)requests.post方法 一般要传递data数据
import requests
url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false'
data = {
'first':'true',
'pn':1,
'kd':'python'
}
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6726.400 QQBrowser/10.2.2265.400',
'Referer':'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput='
}
res = requests.post(url,data=data, headers=headers)
print(res.text)
3.代理
import requests
url = 'http://httpbin.org/ip'
res = requests.get(url)
print(res.text)
proxy = {
'http':'116.213.64.101:53281' 将代理地址做成字典封装在proxy中
}
res = requests.get(url,proxies = proxy) 将proxy传入proxies中
print(res.text)
4.认证
res = requests.get("http://www.12306.cn/",verify=false) 爬起没有认证的网址时 需要指定verify=false verify默认是true