1、获取cookies
直接获取,不行。得设置浏览器头部
2、加上浏览器头部,获取了cookies。此时的cookies是一个jar对象
3、将cookie的格式进行转换查看
由于amazon的反爬比较强,这里用baidu.com作为案例进行演示
并将cookie以字典的形式保存为json文件到本地。
import json
import requests
from requests.cookies import RequestsCookieJar
url='https://www.baidu.com/'
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'
}
res=requests.get(url,headers=headers)
print(res.status_code)
print(res.cookies)
#接下来使用cookies
# 创建一个cookiejar实例
cookie_jar = RequestsCookieJar()
# 将获取的cookie转化为字典
resd = requests.utils.dict_from_cookiejar(res.cookies) #字典格式的cookie
print('resd',resd) #{'BAIDUID': '0620ED9D1B0DF2015903699526F96D09:FG=1', 'BIDUPSID':.....}
# 放开下面的,可查看cookie 的 key/value
print('dict_resd',requests.utils.cookiejar_from_dict(resd))
file = open("cookies_17.json", "w")
json.dump(resd, file)
file.close()
4、从文件中读取cookie,
import json
file = open("cookies_17.json", "r")
cookie_json=json.loads(file.read())
print(cookie_json)
file.close()
print(type(cookie_json))
print(cookie_json.keys()) #<class 'dict_keys'>
print(cookie_json.values())
import requests
from requests.cookies import RequestsCookieJar
cookie_jar = RequestsCookieJar()
url='https://www.baidu.com/'
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'
}
#设置cookie的格式
# cookie_jar.set('cookie[key]', 'cookie[value]', domain='域名')
#方法1
# cookie_jar.set([key for key in cookie_json][0], cookie_json[[key for key in cookie_json][0]], domain='www.baidu.com')
#方法2
cookie_jar.set([item[0] for item in cookie_json.items()][0], [item[1] for item in cookie_json.items()][0], domain='www.baidu.com')
# # 向请求头中添加cookie
res = requests.get(url, headers=headers, cookies=cookie_jar)
print(res.status_code)
print(res.text.encode('utf-8', errors='ignore'))
5.使用代理的情况下获取cookie
使用的是阿布云代理
import json
import requests
# 代理服务器
proxyHost = "http-dyn.abuyun.com"
proxyPort = "9020"
# 代理隧道验证信息
proxyUser = "HIX9H33N87639FSD" #换成你自己的通行证书
proxyPass = "580D452C7B02267E" #换成你自己的通行密钥
proxyMeta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
"host" : proxyHost,
"port" : proxyPort,
"user" : proxyUser,
"pass" : proxyPass,
}
proxies = {
"http" : proxyMeta,
"https" : proxyMeta,
}
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'
}
# 要访问的目标页面
url='https://www.baidu.com/'
resp = requests.get(url, headers=headers,proxies=proxies)
print (resp.status_code)
print (resp.text)
from requests.cookies import RequestsCookieJar
cookie=resp.cookies
cookie_jar = RequestsCookieJar()
# 将获取的cookie转化为字典格式
res_cookie = requests.utils.dict_from_cookiejar(cookie) #字典格式的cookie
#保存到本地文件
file = open("cookies_18.json", "w")
json.dump(res_cookie, file)
file.close()