第4讲 urllib Cookie相关操作,requests简单使用
整体课程知识点查看 :https://blog.csdn.net/j1451284189/article/details/128713764
本讲总结:
urllib添加cookie相关操作
urllib 的UrlError简单介绍
requests第三方库简单使用
一、添加Cookie登录
缺点:需要自己在浏览器上登录获取cookie
#总结:在headers加入cookie即可,和User-Agent相同
def day4_cookies():
url = 'https://www.baidu.com/my/index'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0',
'Cookie':'BAIDUID=183FF5BBC1048B6A7AFE9AD123052C0D:SL=0:NR=10:FG=1;
}
request = urllib.request.Request(url,headers=headers)
response = urllib.request.urlopen(request)
data = response.read().decode('utf-8')
# 将数据写入文件
with open('baidu.html', 'w', encoding='utf-8') as f:
f.write(data)
二、代码自动登录
原理:使用代码登录,登录成功成获取Cookie,处理器自动携带Cookie访问需权限的网址
缺点:需找到登录网址,和其他登录需要的参数,短信验证码等
#总结
login_byte = urllib.parse.urlencode(login_form_data).encode('utf-8') #post请求要求bytes类型
# 定义有添加 cookie 功能的处理器
cook_jar = cookiejar.CookieJar()
cook_handler = urllib.request.HTTPCookieProcessor(cook_jar)
def day4_auto_login():
"""代码模拟登录"""
# 登录页的网址
login_url = 'http://bpoyg.zdzhiheng.com.cn:8000/prod-api/login'
login_header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0'
}
login_form_data = {
"username":"10***28",
"password":"W!j***13#",
"code":"d74y",
}
login_byte = urllib.parse.urlencode(login_form_data).encode('utf-8') #参数转码 post请求要求bytes类型
# 定义有添加 cookie 功能的处理器
cook_jar = cookiejar.CookieJar()
cook_handler = urllib.request.HTTPCookieProcessor(cook_jar)
# 生成opener
opener = urllib.request.build_opener(cook_handler)
# 访问后opener获取cookie
urllib.request.Request(login_url, headers=login_header, data=login_byte)
# 用带有cookiejar的opener 访问个人中心
center_url = 'http://123.com/user/profile'
center_request = urllib.request.Request(center_url, headers=login_header)
response = opener.open(center_request)
data = response.read().decode('utf-8')
# 将数据写入文件
with open('baidu.html', 'w', encoding='utf-8') as f:
f.write(data)
三、URLError
#URLerror 用于应对出错时抛出对应解决方法
import urllib.error
def day4_urlerr():
#HttpError UrlError(父)
url = 'http://www.python.org/'
try:
response = urllib.request.urlopen(url,timeout=0.1)
except urllib.error.HTTPError as err:
print(err.code)
except urllib.error.URLError as err:
print(err)
四、requests第三方库
优点:简单易用,url不需要转译;支持Python2,3且使用方法相同
#简单使用,见案例1
response = requests.get(url,headers) #获取响应
#response的各种属性与方法
def day4_requests_get():
"""request.get使用演示,post使用相同"""
url = 'http://www.baidu.com/'
params = {
'wd': '美女'
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0',
}
# 发送请求,获取响应 参数无需解码
response = requests.get(url=url, headers=headers, params=params)
# 响应解码
data = response.content.decode('utf-8') # content byte
data = response.text # text str
data = response.json # json dict
#常用参数获取
# 获取请求头
request_headers = response.request.headers
# 获取响应头
response_header = response.headers
# 获取响应状态码
code = response.status_code
# 获取请求的Cookie
request_cookie = response.request._cookies
# 获取响应的Cookie
response_cookie = response.cookies
print(code)
def day4_requests_json():
"""当响应为json时如何使用"""
url = ' https://fanyi.baidu.com/langdetect'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0',
}
data = {
'query': "encode"
}
response = requests.post(url, headers=headers,data=data)
# content获取 json响应数据处理方法
# data = response.content.decode()
# 将str转为dict
# data_dict = json.loads(data)
# print(data_dict['lan'])
# json获取json响应 字典类型
data = response.json()
print(data)