1、打开网页
2、点击到账号密码登录
3、判断验证码是否出现
4、输入账号密码
5、对验证码图片下载
6、通过zheye对图片进行分析获取倒立文字的坐标
7、网页对图片储存缩放一半,坐标进行处理
8、通过move和click进行点击倒立文字坐标
9、登录
# -*- coding: utf-8 -*-
import os
import sys
from selenium import webdriver
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import scrapy
from mouse import move,click
import time
import pickle
from selenium.webdriver.common.keys import Keys
'''知乎破解倒立文字,登录'''
class ZhihuSpider(scrapy.Spider):
name = 'zhihu'
allowed_domains = ['www.zhihu.com']
start_urls = ['https://www.zhihu.com/']
def parse(self, response):
pass
def start_requests(self):
from selenium.webdriver.chrome.options import Options
'''解析英文验证码'''
res_file = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'res')
chrome_option = Options()
chrome_option.add_argument("--disable--extensions")
chrome_option.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
browser = webdriver.Chrome(executable_path='D:\chromedriver_win32\chromedriver.exe',
chrome_options=chrome_option)
try:
browser.maximize_window()
except Exception as e:
pass
browser.get('https://www.zhihu.com/signin')
browser.find_element_by_xpath('//div[@class="SignFlow-tabs"]/div[2]').click()
browser.find_element_by_xpath(
'//div[@class="SignFlowInput SignFlow-accountInputContainer"]/label/input').send_keys(Keys.CONTROL+"a")
browser.find_element_by_xpath('//div[@class="SignFlowInput SignFlow-accountInputContainer"]/label/input').send_keys('17792926712')
browser.find_element_by_xpath('//div[@class="SignFlow-password"]//input').send_keys(Keys.CONTROL+"a")
browser.find_element_by_xpath('//div[@class="SignFlow-password"]//input').send_keys('123456')
# browser.find_element_by_xpath('//div[@class="SignContainer-inner"]/form/button').click()
# 有时验证码会提前弹出,需要判断是否需要点击提交等待弹出
try:
english_captcha_element = browser.find_element_by_class_name('Captcha-englishImg')
except:
english_captcha_element = None
try:
chinese_captcha_element = browser.find_element_by_class_name("Captcha-chineseImg")
except:
chinese_captcha_element = None
show = False
if chinese_captcha_element:
text = chinese_captcha_element.get_attribute("src").replace('data:image/jpg;base64','')
if 'null' not in text:
show = True
if english_captcha_element:
text = english_captcha_element.get_attribute("src").replace('data:image/jpg;base64', '')
if 'null' not in text:
show = True
if not show:
move(756, 505)
click()
login_success = False
while login_success:
try:
notify_ele = browser.find_element_by_xpath('//div[@class="AppHeader-inner"]/ul/li[2]/a/text()')
login_success = True
except:
pass
time.sleep(10)
try:
english_captcha_element = browser.find_element_by_class_name('Captcha-englishImg')
except:
english_captcha_element = None
try:
chinese_captcha_element = browser.find_element_by_class_name("Captcha-chineseImg")
except:
chinese_captcha_element = None
if chinese_captcha_element:
ele_position = chinese_captcha_element.location
x_relative = ele_position["x"]
y_relative = ele_position["y"]
# 执行js,计算整个高度-上面TAB高度
browser_navigation_panel_height = browser.execute_script('return window.outerHeight-window.innerHeight;')
base64_text = chinese_captcha_element.get_attribute("src")
import base64
code = base64_text.replace('data:image/jpg;base64','').replace('%0A','')
file_path = os.path.join(res_file, 'yzm_c.jpeg')
if os.path.exists(file_path):
os.remove(file_path)
fh = open(file_path,'wb')
fh.write(base64.b64decode(code))
fh.close()
# zheye解析中文验证码
from zheye import zheye
z = zheye()
time.sleep(1)
pt = z.Recognize(file_path)
position_news = []
if len(pt) == 2:
position_news.append([pt[0][1]/2, pt[0][0]/2])
position_news.append([pt[1][1]/2, pt[1][0]/2])
else:
position_news.append([pt[0][1]/2, pt[0][0]/2])
# 循环点击倒立文字
for position_new in position_news:
print('x:{}'.format(x_relative+position_new[0]))
print('y_relative:{}'.format(y_relative))
print('browser_navigation_panel_height:{}'.format(browser_navigation_panel_height))
print('position_new[1]:{}'.format(position_new[1]))
# time.sleep(1)
move(x_relative+position_new[0],y_relative+browser_navigation_panel_height+position_new[1])
# move(x_relative+position_new[0],y_relative+browser_navigation_panel_height+position_new[1])
click()
time.sleep(5)
browser.find_element_by_xpath('//div[@class="SignContainer-inner"]/form/button').click()
# 鼠标点击
move(756,542)
click()
cookies = browser.get_cookies()
cookie_dict = {}
for cookie in cookies:
cookie_dict[cookie['name']] = cookie['value']
return [scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict)]
else:
# 英文验证码处理
pass
# 通过读取cookies进行登录
# cookie_file = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
# 'cookies', 'zhihu.cookie')
# cookies = pickle.load(open(cookie_file,'rb'))
# cookie_dict = {}
# for cookie in cookies:
# cookie_dict[cookie['name']] = cookie['value']
# return [scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict)]
'''
获取cookie,并保存
from selenium.webdriver.chrome.options import Options
chrome_option = Options()
chrome_option.add_argument("--disable--extensions")
chrome_option.add_experimental_option("debuggerAddress","127.0.0.1:9222")
browser = webdriver.Chrome(executable_path='D:\chromedriver_win32\chromedriver.exe',chrome_options=chrome_option)
browser.get('https://www.zhihu.com/signin')
# browser.find_element_by_xpath('//div[@class="SignFlow-tabs"]/div[2]').click()
# browser.find_element_by_xpath(
# '//div[@class="SignFlowInput SignFlow-accountInputContainer"]/label/input').send_keys(Keys.CONTROL+"a")
# browser.find_element_by_xpath('//div[@class="SignFlowInput SignFlow-accountInputContainer"]/label/input').send_keys('17792926722')
# browser.find_element_by_xpath('//div[@class="SignFlow-password"]//input').send_keys(Keys.CONTROL+"a")
# browser.find_element_by_xpath('//div[@class="SignFlow-password"]//input').send_keys('123456')
# # 鼠标点击
# move(756,542)
# click()
# # 选择控件点击
# browser.find_element_by_xpath('//div[@class="SignContainer-inner"]/form/button').click()
# 获取cookie
cookies = browser.get_cookies()
pickle.dump(cookies,open(cookie_file,'wb'))
cookie_dict = {}
for cookie in cookies:
cookie_dict[cookie['name']] = cookie['value']
return [scrapy.Request(url=self.start_urls[0],dont_filter=True,cookies=cookie_dict)]
'''