调试了好长时间:代码分享给大家
登录一次之后可以直接用 get_index() 登录了
获取验证码的操作一定是用 session ,而不是 requests,session 中保留了服务器发给你的 cookies
# coding=utf-8
import requests
import cookielib
from pyquery import PyQuery
import time
import re
import Image
header = {
'Referer': 'https://www.zhihu.com/',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36',
'Host': 'www.zhihu.com'
}
session = requests.session()
session.cookies = cookielib.LWPCookieJar(filename='cookies.txt')
try:
session.cookies.load(ignore_discard=True)
except:
print 'cookie未能正确加载'
def get_xsrf():
reponse = session.get('https://www.zhihu.com', headers=header)
#print reponse.text
if reponse.text:
s = re.findall('.*name="_xsrf" value="(.*?)"', reponse.text)
#print s
if s:
#print s[0]
return s[0]
#print (reponse.text)
def get_captcha():
t = str(int(time.time()*1000))
str_captcha = 'https://www.zhihu.com/captcha.gif?r={0}&type=login'.format(t)
t = session.get(str_captcha,headers=header) #注意这里要用 session 而不是 requests
with open("captcha.jpg","wb") as f:
f.write(t.content)
f.close()
try:
im = Image.open("captcha.jpg")
im.show()
im.close()
except:
pass
cap = raw_input("请输入验证码\n>")
#print cap
return cap
def login_zhihu(account,paasword):
if re.match("^1\d{10}",account):
print '手机验证码登录'
post_url = 'https://www.zhihu.com/login/phone_num'
captcha = get_captcha()
post_data = {
"_xsrf":get_xsrf(),
'phone_num':account,
'password':paasword,
'captcha':captcha
}
if re.match("^[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+$",account):
print '邮箱验证码登录'
post_url = 'https://www.zhihu.com/login/email'
captcha = get_captcha()
post_data = {
"_xsrf": get_xsrf(),
'email': account,
'password': paasword,
'captcha': captcha
}
reponse_text = session.post(post_url, data=post_data, headers=header)
print reponse_text
session.cookies.save()
def get_index():
reponse = session.get("https://www.zhihu.com",headers=header)
with open("index_page.html","wb") as f:
f.write(reponse.text.encode('utf-8'))
print ('ok 已经保存到 index_page 文件中')
def is_login():
indox_url = 'https://www.zhihu.com/inbox'
reponse = session.get(indox_url,headers=header,allow_redirects=False)
if reponse.status_code != 200:
return False
else:
return True
#login_zhihu('xxx','xxx')
#login_zhihu("xx","xx")
#get_captcha()
get_index()
print is_login()