cookie
适用的场景:数据采集时需要绕过登录页面进入到某个页面
个人信息页面是utf-8 但是还报了编码错误,因为并没有进入到个人信息页面,而是跳转到了登录页面,登录页面不是utf-8所以报错
爬取qq空间案例:
import urllib.request
url = 'https://user.qzone.qq.com/2278301629'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
# cookie中携带者用户的登录信息,如果有登陆之后的cookie,那么我们就可以携带cookie进入到任何页面
'Cookie': '_qpsvr_localtk=0.8093102956688834; pgv_pvid=471062464; pgv_info=ssid=s7645717872; ptui_loginuin=2278301629; uin=o2278301629; skey=@winyMvoRC; RK=TPWtHsOttl; ptcz=2b09c903cf0a42835fcd2002c26eeb98f37ba2af8b9954358c781485fa344471; p_uin=o2278301629; pt4_token=E6-lb0NoLII95TJsm7NmSjFJrlPw3vED0JluNnchAVk_; p_skey=GGyxYusrKcA0wk5Mz7ISod48NkAEZ-DRiglUHLvWO80_; Loading=Yes; qz_screen=1536x864; 2278301629_todaycount=0; 2278301629_totalcount=5519; QZ_FE_WEBP_SUPPORT=1; cpu_performance_v8=0',
# referer 判断当前路径是不是由上一个进来的,一般用来做图片的防盗链
'Referer':'https://qzs.qq.com/'
}
request = urllib.request.Request(url=url, headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
with open('谷得耐特.html', 'w', encoding='utf-8') as fp:
fp.write(content)