import time
from bs4 import BeautifulSoup
import requests
__file__ = 'zhihudata.py'
__author__ = 'Jerry Liu'
__date__ = '2016-04-21'
headers_base = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2',
'Connection': 'keep-alive',
'Host': 'www.zhihu.com',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36',
'Referer': 'http://www.zhihu.com/',
}
class Zhihu:
def __init__(self):
self.domain = 'https://www.zhihu.com/'
self.loginurl = 'http://www.zhihu.com/login/email'
self.followees = 'https://www.zhihu.com/people/liuyu2783/followees'
self.username = 'xxx@163.com'
self.pwd = 'xxxxxx'
self.captcha = ''
self.xsrf = ''
self._session = requests.session()
def getCaptcha(self):
"""
获取验证码
:return:
"""
# 验证码
t = str(int(time.time() * 1000))
captcha_url = 'http://www.zhihu.com/captcha.gif?r' + t + "&type=login"
captcha = self._session.get(captcha_url, stream=True)
f = open('d://爬虫/知乎壁验证码/captcha.gif', 'wb')
for line in captcha.iter_content(10):
f.write(line)
f.close()
def doSignin(self):
self.getCaptcha()
self.captcha = input('请输入验证码, ')
# xsrf
result = self._session.get(
'https://www.zhihu.com/#signin',
headers=headers_base
)
hbody = BeautifulSoup(result.text, "html.parser")
self.xsrf = ((hbody.find_all('input', attrs={"name": "_xsrf"}))[0]).get('value')
# 登录
result = self._session.post(
self.loginurl,
data={'email': self.username, 'password': self.pwd, 'captcha': self.captcha,'_xsrf': self.xsrf},
headers=headers_base
)
r= result.json()
if r['r'] == 0:
print('登录成功')
else:
print('登录失败,请重试......', r['msg'])
self.doSignin()
def main():
zhihu = Zhihu()
zhihu.doSignin()
if __name__ == '__main__':
main()
Python 半自动登录知乎-验证码需要识别
最新推荐文章于 2022-07-31 20:40:30 发布