数据获取-requests库
requests通用
代码说明与简单案例
# 优点:简单易用,url不需要转译;支持Python2,3且使用方法相同
#获取响应
response = requests.get(url,headers)
'response的各种属性与方法'
def day4_requests_get():
"""request.get使用演示,post使用相同"""
url = 'http://www.baidu.com/'
params = {
'wd': '美女'
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0',
}
# 发送请求,获取响应 参数无需解码
response = requests.get(url=url, headers=headers, params=params)
# 响应解码
data = response.content.decode('utf-8') # content byte
data = response.text # text str
data = response.json # json dict
#常用参数获取
# 获取请求头
request_headers = response.request.headers
# 获取响应头
response_header = response.headers
# 获取响应状态码
code = response.status_code
# 获取请求的Cookie
request_cookie = response.request._cookies
# 获取响应的Cookie
response_cookie = response.cookies
"""当响应为json时数据解析"""
def day4_requests_json():
url = ' https://fanyi.baidu.com/langdetect'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0',
}
data = {
'query': "encode"
}
response = requests.post(url, headers=headers,data=data)
#1、 content获取 json响应数据处理方法
# data = response.content.decode()
# 将str转为dict
# data_dict = json.loads(data)
# print(data_dict['lan'])
# 2、json获取json响应 字典类型
data = response.json()
print(data)
requests 与代理
def day5_requests_proxy():
url = 'https://www.baidu.com/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0',
}
#添加免费代理
free_proxy = {
'http': '120.77.249.46:8080'
}
response = requests.get(url,headers=headers,proxies=free_proxy)
print(response.status_code)
requests 与 SSL
def day5_requests_ssl():
url = 'http://www.icbc.com.cn/icbc/'
# https需第三方证书认证 SSL
# 12306是自己的证书
# 解决方法:告诉web忽略证书,直接访问
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0',
}
# verify 忽略证书
response = requests.get(url,headers=headers,verify=False)
print(response.status_code)
with open('baidu.html', 'w', encoding='utf-8') as f:
f.write(response.content.decode())
requests与 Cookie
cookie字符串转字典
cookie 复制后是字符串,但
cookies = 'BAIDUID=9E7C2D060E38585B188986F0EF0CA99E:SL=0:NR=10:FG=1;BIDUPSID=314F6F32BDBB7DC43ED777384251421C;'
方法1:正则匹配 自己替换
替换 ;为\n
替换如图:
方法二:函数处理
cookie_dict = {} cookie_list = cookies.split('; ') for cookie in cookie_list: cookie_dict[cookie.split('=')[0]] = cookie.split('=')[1]
方法三:字典推导式
cookie_dict = {cookie.split('=')[0] : cookie.split('=')[1] for cookie in cookies.split('; ')}
携带Cookie访问权限信息
def day5_requests_cookies():
url = ' https://www.baidu.com/my/index'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0',
}
#方法一:传参给cookies 需要的传参类型是字典
# cookies = 'BAIDUID=9E7C2D060E38585B188986F0EF0CA99E:SL=0:NR=10:FG=1; BIDUPSID=314F6F32BDBB7DC43ED777384251421C; PSTM=1631362136'
# cookie_dict = {cookie.split('=')[0] : cookie.split('=')[1] for cookie in cookies.split('; ')}
# print(cookie_dict)
# response = requests.get(url, headers=headers, cookies=cookie_dict)
# with open('baidu.html', 'w', encoding='utf-8') as f:
# f.write(response.content.decode())
# 方法二 传参给headers
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0',
'Cookie':'BAIDUID=9E7C2D060E38585B188986F0EF0CA99E:SL=0:NR=10:FG=1; BIDUPSID=314F6F32BDBB7DC43ED777384251421C; PSTM=1631362136;'
}
response = requests.get(url, headers=headers)
with open('baidu.html', 'w', encoding='utf-8') as f:
f.write(response.content.decode())
自动登录获取Cookie
def day5_auto_login():
#session 类似于cookiejar 自动保存cookie
session = requests.session()
url = 'https://www.zcbbe.com/member.php?mod=logging&action=login&loginsubmit=yes&infloat=yes&lssubmit=yes&inajax=1'
login_header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0',
# 'Host':'bpoyg.zdzhiheng.com.cn:8000',
# 'Origin':'http://bpoyg.zdzhiheng.com.cn:8000',
# 'Referer':'http://bpoyg.zdzhiheng.com.cn:8000/login',
}
login_form_data = {
"username": "jml",
"password": "j*******9",
"quickforward": 'yes',
"handlekey": 'ls',
}
# 登录操作,获得cookie
response = session.post(url=url,headers=login_header,data=login_form_data)
print(response.content)
# 直接获取权限信息
center_url = 'https://www.zcbbe.com/plugin.php?id=dc_vip&moblie=no&mobile=no'
response = session.get(center_url,headers=login_header)
with open('baidu.html', 'w', encoding='utf-8') as f:
f.write(response.content.decode())