类型:modelsResponse
六个属性
r.text:获取网页源码
r.encoding:访问或定制编码方式
r.url:获取请求的url
r.content:响应的字节类型
r.status_code:响应的状态码
r.headers:响应的头信息
requess基本使用
import requests
url = 'http://www.baidu.com'
response = requests.get(url)
1. r.text属性【有中文会返回乱码】
print(response.text) # 以字符串形式返回网页源码
2. r.encoding属性【 设置编码格式来处理返回数据中的中文乱码】
response.encoding = 'utf-8'
print(response.text) # 以字符串形式返回网页源码
3. r.url属性【返回我们请求的url】
print(response.url) # 返回我们请求的路径
# http://www.baidu.com/
4. r.content属性【返回二进制数据】
print(response.content)
5. r.status_code属性【返回状态码】
print(response.status_code)
# 200
6. r.headers【返回响应头】
print(response.headers)
# {'Cache-Control': 'private, no-cache, no-store, proxy-revalidate, no-transform', 'Connection': 'keep-alive', 'Content-Encoding': 'gzip', 'Content-Type': 'text/html', 'Date': 'Sun, 09 Jul 2023 04:02:26 GMT', 'Last-Modified': 'Mon, 23 Jan 2017 13:28:24 GMT', 'Pragma': 'no-cache', 'Server': 'bfe/1.0.8.18', 'Set-Cookie': 'BDORZ=27315; max-age=86400; domain=.baidu.com; path=/', 'Transfer-Encoding': 'chunked'}
get请求
r.get(url, params, kwargs)【url:地址;params:参数;kwargs:字典】
路径后面的参数使用params传递【data】
注意:出现网络不给力是遭遇了反爬虫手段,需要自己手动添加cookie,现在的都是动态cookie,需要我们自己取请求头中重新拿下来。
【爬取百度搜索湖北的页面源码】
import requests
url = 'http://www.baidu.com/s?'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
'Cookie': 'BIDUPSID=366E9AA11A68D9963E260EA3319F216C; PSTM=1675775907; BAIDUID=366E9AA11A68D9963D810AE9911912AB:FG=1; sugstore=0; BAIDUID_BFESS=366E9AA11A68D9963D810AE9911912AB:FG=1; ZFY=1ViXU6l:AiJ7u:AOb:BhRUMjT5CrL:Bfd4BNL:BQRa:A568y8:C; __bid_n=188d431969613287231e7b; BDUSS=nRVRWp0QzNtQngtUS10amRLUVc3ZlVhM29UeUJ-MGZtUVJ0WFFGd3kzRGNIYjFrSVFBQUFBJCQAAAAAAQAAAAEAAAACfmV4x-XT8MSrsLK7tgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANyQlWTckJVkY; BDUSS_BFESS=nRVRWp0QzNtQngtUS10amRLUVc3ZlVhM29UeUJ-MGZtUVJ0WFFGd3kzRGNIYjFrSVFBQUFBJCQAAAAAAQAAAAEAAAACfmV4x-XT8MSrsLK7tgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANyQlWTckJVkY; BD_UPN=12314753; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BA_HECTOR=a4018ga401258l218g2ka18o1iaj1261p; B64_BOT=1; BD_HOME=1; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; BD_CK_SAM=1; PSINO=3; delPer=0; H_PS_PSSID=36561_38642_38831_39026_39024_38860_38957_38954_38915_38820_38989_38638_26350_38948; H_PS_645EC=70e8ZFMek6L2%2BwEMnoBjnYmBfsEC3q2UlTHM1iL7%2BJy%2FpZF0g3zmZPKdZkU; baikeVisitId=fcf68cbc-15d3-4e47-8cf1-f03f2f74fa51; COOKIE_SESSION=8_1_8_9_8_3_1_0_7_4_0_1_10018868_0_0_0_1688779790_1688779781_1688876617%7C9%230_1_1688779778%7C1'
}
data = {
'wd': '湖北'
}
response = requests.get(url=url, params=data, headers=headers)
response.encoding = 'utf-8'
content = response.text
print(content)
post请求
response = requests.post(url, data, json, kwargs)
url:和get一样
data:相当于get的params
kwargs:字典
【百度翻译‘eye’】
import requests
import json
url = 'https://fanyi.baidu.com/sug'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
}
data = {
'kw': 'eye'
}
response = requests.post(url=url, data=data, headers=headers)
content = response.text
obj = json.loads(content)
print(obj)
代理
依然使用快代理【我的最后还是显示本地真实ip,可能是没付钱吧】
import requests
import json
url = 'http://www.baidu.com/s?'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
'Cookie': 'BIDUPSID=366E9AA11A68D9963E260EA3319F216C; PSTM=1675775907; BAIDUID=366E9AA11A68D9963D810AE9911912AB:FG=1; sugstore=0; BAIDUID_BFESS=366E9AA11A68D9963D810AE9911912AB:FG=1; ZFY=1ViXU6l:AiJ7u:AOb:BhRUMjT5CrL:Bfd4BNL:BQRa:A568y8:C; __bid_n=188d431969613287231e7b; BDUSS=nRVRWp0QzNtQngtUS10amRLUVc3ZlVhM29UeUJ-MGZtUVJ0WFFGd3kzRGNIYjFrSVFBQUFBJCQAAAAAAQAAAAEAAAACfmV4x-XT8MSrsLK7tgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANyQlWTckJVkY; BDUSS_BFESS=nRVRWp0QzNtQngtUS10amRLUVc3ZlVhM29UeUJ-MGZtUVJ0WFFGd3kzRGNIYjFrSVFBQUFBJCQAAAAAAQAAAAEAAAACfmV4x-XT8MSrsLK7tgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANyQlWTckJVkY; BD_UPN=12314753; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BA_HECTOR=a4018ga401258l218g2ka18o1iaj1261p; B64_BOT=1; BD_HOME=1; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; BD_CK_SAM=1; PSINO=3; delPer=0; H_PS_PSSID=36561_38642_38831_39026_39024_38860_38957_38954_38915_38820_38989_38638_26350_38948; H_PS_645EC=70e8ZFMek6L2%2BwEMnoBjnYmBfsEC3q2UlTHM1iL7%2BJy%2FpZF0g3zmZPKdZkU; baikeVisitId=fcf68cbc-15d3-4e47-8cf1-f03f2f74fa51; COOKIE_SESSION=8_1_8_9_8_3_1_0_7_4_0_1_10018868_0_0_0_1688779790_1688779781_1688876617%7C9%230_1_1688779778%7C1'
}
data = {
'wd': 'ip'
}
# proxy代理 字典类型
proxy = {
'http': '36.134.91.82:8888'
}
response = requests.get(url=url, params=data, headers=headers, proxies=proxy)
response.encoding = 'utf-8'
content = response.text
with open('daili.html', 'w', encoding='utf-8')as fp:
fp.write(content)
requests的文件保存[csv]
后缀为csv, 写入时要用单引号包裹,以逗号分隔
fp.write(f'{}, {}, {}, {}, {}')
如果遇到换行符,可以使用str.strip()来去空格