# -*- coding: utf-8 -*- import requests import json
try: import cookielib except: import http.cookiejar as cookielib import re from bs4 import BeautifulSoup import os, time import cv2 try: from PIL import Image except: pass session = requests.session() session.cookies = cookielib.LWPCookieJar(filename="cookies.txt") try: session.cookies.load(ignore_discard=True) except: print ("cookie未能加载") agent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0" header = { "HOST":"www.zhihu.com", "Referer": "https://www.zhizhu.com", 'User-Agent': agent } push = [] def is_login(): #通过个人中心页面返回状态码来判断是否为登录状态 inbox_url = "https://www.zhihu.com/question/56250357/answer/148534773" response = session.get(inbox_url, headers=header, allow_redirects=False) if response.status_code != 200: return False else: return True def get_xsrf(): homeurl = 'https://www.zhihu.com/#signin' homeresponse = session.get(url=homeurl, headers=header) homesoup = BeautifulSoup(homeresponse.text, 'html.parser') xsrfinput = homesoup.find('input', {'name': '_xsrf'}) xsrf_token = xsrfinput['value'] return xsrf_token print(xsrf_token) def left(event,x,y,flags,param): if event == cv2.EVENT_LBUTTONDOWN: a = [x*0.5,y*0.5] push.append(a) randomtime = str(int(time.time() * 1000)) captchaurl = 'https://www.zhihu.com/captcha.gif?r='+\ randomtime+"&type=login&lang=cn" captcharesponse = session.get(url=captchaurl, headers=header) with open(r'G:\linuxShare\ArticleSpider\ArticleSpider\utils\captcha.gif', 'wb') as f: f.write(captcharesponse.content) f.close() im = Image.open(r'G:\linuxShare\ArticleSpider\ArticleSpider\utils\captcha.gif') im = im.convert('RGB') im.save(r'G:\linuxShare\ArticleSpider\ArticleSpider\utils\captcha.jpg', 'jpeg') img = cv2.imread(r"G:\linuxShare\ArticleSpider\ArticleSpider\utils\captcha.jpg") print(type(img)) cv2.namedWindow('Image') cv2.setMouseCallback('Image',left) cv2.imshow('Image', img) cv2.waitKey(0) def get_index(): response = session.get("https://www.zhihu.com/#signin", headers=header) with open("index_page.html", "wb") as f: f.write(response.text.encode("utf-8")) print ("ok") def zhihu_login(account, password): #知乎登录 if re.match("^1\d{10}",account): print ("手机号码登录") post_url = "https://www.zhihu.com/login/phone_num" post_data = { "_xsrf": get_xsrf(), "phone_num": account, "password": password, "captcha_type": "cn ", "captcha" : {"img_size":[200,44],"input_points":push} } else: if "@" in account: #判断用户名是否为邮箱 print("邮箱方式登录") post_url = "https://www.zhihu.com/login/email" post_data = { "_xsrf": get_xsrf(), "email": account, "password": password, "captcha_type": "cn", "captcha": {"img_size": [200, 44], "input_points": push} } print(post_data) print(push) response_text = session.post(post_url, data=json.dumps(post_data), headers=header) session.cookies.save()