# -*- coding:utf-8 -*-
import re
import requests
import http.cookiejar as cookieJar
import time
import codecs
"""
1使用requests中的session会话加载Cookie,如果有Cookie 直接用,如果没有,不加载,先模拟登录,将登陆成功返回的cookie记录保存文件,一变下次使用
2 在登录过程中,需要从登陆页面返回的cookie中长出_xsrf,在传递参数的时候将其带上,并且带上用户名和密码,又可能会出现验证码。可以使用打码云工具破解,把验证码带上,就可以登录成功了,登录成功之后,利用
session。cookie.save()函数,。将cookie保存本地
3 在之后发起请求,只需要将之前保存的cookie带上,即可正常访问知乎的数据
"""
class ZHCookies(object):
def __init__(self, phone_num, password):
self.url = "https://www.zhihu.com/signup?next=%2F"
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/64.0.3282.186 Safari/537.36",
'Host': 'www.zhihu.com',
"Referer": "https: // www.zhihu.com /"
}
self.filename = phone_num + "page.txt"
self.password = password
self.session = requests.Session()
self.session.cookies = cookieJar.LWPCookieJar(filename=self.filename)
self.phone_num = phone_num
def get_cookies(self):
response = self.session.get(self.url, headers=self.headers)
# 查找_xsrf
print(response)
res = response.headers['Set-cookie']
xsrf = re.search(re.compile('_xsrf=(.*?);',re.S),res)
xsrf = xsrf.group(1)
# 再次登录
while 1:
url = "https://www.zhihu.com/login/phone_num"
data = {
"phone_num": self.phone_num,
"password": self.password
}
response = self.session.post(url, data=data, headers=self.headers)
res = response.json()
if res.get("r") == 0:
print("登录成功")
self.session.cookies.save()
break
else:
print(res.get("mag"))
print("正在尝试登录。。")
time.sleep(1)
if __name__ == "__main__":
user_info = [{"phone_num": "********", "password": "*******"}]
for user in user_info:
zh = ZHCookies(user["phone_num"], user["password"])
# zh.get_cookies()
# 利用cookie再次登录
import random
user = random.choice(user_info)
with requests.Session() as session:
# 随机加载 本地cookies文件
session.cookies = cookieJar.LWPCookieJar()
session.cookies.load(user["phone_num"] + "page.txt")
# 发请求
url = "http://www.zhihu.com"
response = session.get(url, headers={
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/64.0.3282.186 Safari/537.36",
'Host': 'www.zhihu.com',
"Referer": "https: // www.zhihu.com /"})
# 保存
with codecs.open("zhihu1.html", "w", encoding="utf-8") as f:
f.write(response.text)