1.直接登陆后获取cookie
import requests # 直接将登陆成功之后的Cookie放在headers中,向页面发送请求。 url = 'https://www.zhihu.com/' headers = { "Host": "www.zhihu.com", "Referer": "https://www.zhihu.com/signup?next=%2F", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0", "Cookie": 'd_c0="AGAku45jtw2PTu09Dpvhke4klei4JarodIE=|1528425319"; q_c1=a46ba33124a2403cb907a2d9105a7bd4|1528425319000|1528425319000; capsion_ticket="2|1:0|10:1528705722|14:capsion_ticket|44:NzE1YzNhZmJiZjIxNDA1MTg4ZTdkN2YyMTFiNWQwNTk=|42d9339a55b21206f1cae511940cab2468b6e201c1adce2c958be61edfadb1a0"; _zap=cc087957-a74b-43fc-a1d9-f7bd685897b7; _xsrf=8e9717b7-05a4-481c-8e82-3ca140d5b266; tgw_l7_route=156dfd931a77f9586c0da07030f2df36; z_c0="2|1:0|10:1528705732|4:z_c0|92:Mi4xRHBTMkJRQUFBQUFBWUNTN2ptTzNEU1lBQUFCZ0FsVk54SUFMWEFEMGtfeUowbzNNeXlQRjcwYXVSNV9zMHV1UXZn|27f3bcbcc0b8271658d88009ef05d80b3da8b6df2fb156a7c186a5d076047a63' } # allow_redirects=False 禁止重定向的参数。否则,无法获取302的状态码。 response = requests.get(url, headers=headers, allow_redirects=False) print(response.status_code) # 302 print(response.text)
2.从登陆界面通过抓包的方式获取访问了那一些网址,一知乎为例
1>中文验证码
# 英文验证码的登录方式 # 中文登录(点击倒立文字) import requests,time,json # from requests.packages.urllib3.exceptions import InsecureRequestWarning # requests.packages.urllib3.disable_warnings(InsecureRequestWarning) headers = { "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0", "Referer": "https://www.zhihu.com/signup?next=%2F", "origin": "https://www.zhihu.com", "Authorization": "oauth c3cef7c66a1843f8b3a9e6a1e3160e20", } # cookies的自动化管理。 # 获取的服务器的Set-Cookie用session直接自动解析并保存,在后续的请求中,会在请求头中自动携带这些cookie # LWPCookieJar:对cookie进行自动操作,load() save() from http.cookiejar import LWPCookieJar session = requests.Session() session.cookies = LWPCookieJar(filename='zhihucookie.txt') try: session.cookies.load(filename='zhihucookie.txt', ignore_expires=True, ignore_discard=True) except Exception as e: print('加载失败') res = session.get('https://www.zhihu.com/', headers=headers, verify=False) print(res) def zhihu_login(): global session cap = '' has_captcha = is_captcha() if has_captcha: # 获取验证码 cap = get_captcha() # 在提交登陆之前,还需要对输入的验证码的正确性进行单独验证 is_true = check_captcha(cap) if is_true ==