urllib之cookie
cookies处理首先需要相关handler
获取cookie
import http.cookiejar
import urllib.request
# 声明一个cookiejar对象
cookie = http.Cookiejar.CookieJar()
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open("https://www.baidu.com")
print(cookie)
for item in cookie:
print(item.name+"=" +item.value)
输出结果
IDUPSID=F1E1866953405375EB1EA5AE50CB1B35
PSTM=1563535868
BD_NOT_HTTPS=1
保存cookie到text文件
import urllib.request
from http import cookiejar
def save_cookie(url,file_name):
# cookie = cookiejar.MozillaCookieJar(file_name)保存为Mozilla型浏览器的Cookies格式
# 保存为libwww-perl(LWP)格式的Cookies文件
cookie = cookiejar.LWPCookieJar(file_name)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
opener.open(url)
cookie.save()
if __name__ == '__main__':
url = "https://www.baidu.com"
file_name = "cookie_LWP.text"
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
}
req = urllib.request.Request(url,headers=headers)
save_cookie(req,file_name)
Mozilla格式
# Netscape HTTP Cookie File
# http://curl.haxx.se/rfc/cookie_spec.html
# This is a generated file! Do not edit.
.baidu.com TRUE / FALSE 3710949608 BAIDUID 182A5E3788874CC39983E8533C99B495:FG=1
.baidu.com TRUE / FALSE 3710949608 BIDUPSID 182A5E3788874CC39983E8533C99B495
.baidu.com TRUE / FALSE 3710949608 PSTM 1563465961
LWP格式
#LWP-Cookies-2.0
Set-Cookie3: BAIDUID="3673AD9D161DE3CD2E5C7CE30F2C1DBC:FG=1"; path="/"; domain=".baidu.com"; path_spec; domain_dot; expires="2087-08-05 19:20:47Z"; version=0
Set-Cookie3: BIDUPSID=3673AD9D161DE3CD2E5C7CE30F2C1DBC; path="/"; domain=".baidu.com"; path_spec; domain_dot; expires="2087-08-05 19:20:47Z"; version=0
Set-Cookie3: PSTM=1563465999; path="/"; domain=".baidu.com"; path_spec; domain_dot; expires="2087-08-05 19:20:47Z"; version=0
读取cookie
from http import cookiejar
import urllib.request
def read_LWP_cookie(file_name):
cookie = cookiejar.LWPCookieJar()
# load方法读取本地cookie文件
cookie.load(file_name,ignore_discard=True,ignore_expires=True)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open("http://www.baidu.com")
print(response.read().decode("utf-8"))
if __name__ == '__main__':
file_name = "cookie_LWP.text"
read_LWP_cookie(file_name)