爬虫-python -(12) 验证码 -selenium

最新推荐文章于 2024-05-25 19:37:12 发布

朗风风

最新推荐文章于 2024-05-25 19:37:12 发布

阅读量652

点赞数

分类专栏： python(爬虫)-学习笔记文章标签：爬虫 python selenium

本文链接：https://blog.csdn.net/luxppp880/article/details/122491757

版权

python(爬虫)-学习笔记专栏收录该内容

12 篇文章 7 订阅

订阅专栏

文章目录

1.验证码转化

将图片验证码转化为字符串，有两种方式，其一为自己同图像处理识别文字，然后将字符串转化出，其二为让别人去转化，这样就需要上传至别人的网站，识别后将字符串返回给自己。
现在学习第二种方法，这样就需要一个网站，这里用的是超级鹰。
需要再超级鹰网站注册以及绑定微信免费领取1000积分。（因为让它给你识别验证码需要消费积分）
以上过程都走完了，现在需要下载超级鹰的python模板，就是调用超级鹰返回验证码的程序。这个网站上有，直接下载即可。需要简单读取下这个程序，将需要的输入的东西搞明白。
1-3分别对应超级鹰的账号、密码以及ID，ID要在账户内生成。
4-5分别为需要识别的验证码图片数据、验证码类型
、
在这里插入图片描述
2.实现自动登录超级鹰网站
登录界面与网站

验证码图片到超级鹰返回，中间代码不用将模块中代码复制过来，可以用from py import calss

'''
1.将网站的验证码下载
2.将验证码通过超级鹰找出然后填入到对应位置登录
'''
import time
import requests
from hashlib import md5
from selenium import webdriver
from chaojiying import Chaojiying_Client #从chaojiying.py导入类

def open_url(url,show):
    option = webdriver.ChromeOptions()
    # 防止打印一些无用的日志
    option.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging'])
    if not show:
        option.add_argument('--headless')  #无头浏览器 不弹出浏览器 后台操作
        option.add_argument('--disbale--gpu')  
    web = webdriver.Chrome(options=option)
    web.get(url)
    return web
def download_code(web):
    web.find_element_by_xpath('/html/body/div[3]/div/div[3]/div[1]/form/div/img').click()
    time.sleep(0.1)
    img = web.find_element_by_xpath('/html/body/div[3]/div/div[3]/div[1]/form/div/img')
    data = img.screenshot_as_png
    return data
def web_login(web,user,serect,code):
    web.find_element_by_css_selector('[name = "user"]').send_keys(user)
    web.find_element_by_css_selector('[name = "pass"]').send_keys(serect)
    web.find_element_by_css_selector('[name = "imgtxt"]').send_keys(code)
    time.sleep(0.5)
    web.find_element_by_css_selector('[value = "登录"]').click()
    time.sleep(2)
    web.switch_to.window(web.window_handles[-1])  #奇幻至新的网站
    return web
def url_get_data(web):
    res = web.find_element_by_xpath('/html/body/div[3]/div[2]/div[1]/div[1]/span').text
    return res
if __name__ == '__main__':
    url= 'http://www.chaojiying.com/user/login/'
    chaojiying = Chaojiying_Client('账号', '密码', 'ID')	
    #1.打开网站
    web = open_url(url,True)   #False 为不显示窗口
    time.sleep(3)
    #2.将登录页面的验证码截屏
    im = download_code(web)  #不用下载到本地，可以给图片二进制代码
    #3.向超级鹰发送验证码图片，并返回验证码
    code_res = chaojiying.PostPic(im, 1902)['pic_str']	
    #4.输入账号密码和验证码 登录网站
    web = web_login(web,'账号', '密码', code_res)
    #5.从登录后网站获取账户积分
    res = url_get_data(web)
    print('账户积分：'+res)
    time.sleep(1)
    web.close()

2.52破解的注册-谷歌验证码

注册页面
由于12306现在没有图片点击验证码了，所以自己找了一个类似的，但是比那个要难，因为会出现2次或多次，现在只考虑产生一次的情况
要切到两个iframe里面，所以要先切进去，再切出来，再切到另外一个里面。

from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from chaojiying import Chaojiying_Client #从chaojiying.py导入类
from selenium.webdriver.common.action_chains import ActionChains

def open_url(url):
    option = Options()
    # 防止打印一些无用的日志
    option.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging'])
    # 骗12306这不是chromedriver
    option.add_argument('--disable-blink-features=AutomationControlled')
    web = webdriver.Chrome(options=option)
    web.get(url)
    return web


if __name__ == '__main__':
    web = open_url('https://www.52pojie.cn/member.php?mod=logging&action=login')
    time.sleep(3)
    chaojiying = Chaojiying_Client('超级鹰账号', '超级鹰密码', 'ID')	
    iframe = web.find_element_by_xpath('//*[@id="seccode_cS"]/div/table/tbody/tr/td/div/div/div/iframe')
    web.switch_to.frame(iframe) #切换进iframe
    web.find_element_by_xpath('//*[@id="rc-anchor-container"]/div[4]').click()
    time.sleep(3)
    web.switch_to.default_content()#切换回主页面
    res= web.find_element_by_css_selector('[style ="z-index: 2000000000; position: relative; width: 400px; height: 580px;"]')
    iframes = res.find_elements_by_css_selector('[title= "reCAPTCHA 验证将于 2 分钟后过期"]')
    iframe =iframes[-1]
    web.switch_to.frame(iframe) #切换进iframe
    code_pic = web.find_element_by_xpath('//*[@id="rc-imageselect"]/div[2]')
    code_res = chaojiying.PostPic(code_pic.screenshot_as_png, 9004)['pic_str']
    print(code_res)
    time.sleep(2)
    #print(web.page_source)
    for i in code_res.split('|'):
        [x,y]= i.split(',')
        print([int(x),int(y)])
        ActionChains(web).move_to_element_with_offset(code_pic,int(x),int(y)).click().perform()
        time.sleep(1)
    web.find_element_by_xpath('//*[@id="recaptcha-verify-button"]').click()
    time.sleep(1000)

3.12306登录

12306现在没有了图片点击验证倒是让我感觉很意外。

from tkinter import Button
from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
def open_url(url):
    option = Options()
    # 防止打印一些无用的日志
    option.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging'])
    # 骗12306这不是chromedriver
    option.add_argument('--disable-blink-features=AutomationControlled')
    web = webdriver.Chrome(options=option)
    web.get(url)
    return web

def web_login(web,user,serect,code=''):
    web.find_element_by_css_selector('[id = "J-userName"]').send_keys(user)
    web.find_element_by_css_selector('[id = "J-password"]').send_keys(serect)
    #web.find_element_by_css_selector('[name = "imgtxt"]').send_keys(code)
    time.sleep(0.5)
    web.find_element_by_css_selector('[id = "J-login"]').click()
    time.sleep(2)
    #滑动验证
    button =web.find_element_by_xpath('//*[@id="nc_1__scale_text"]/span')
    webdriver.ActionChains(web).drag_and_drop_by_offset(button,350,0).perform()
    web.switch_to.window(web.window_handles[-1])  #切换至新的网站
    time.sleep(3)
    #防疫确定
    web.find_element_by_css_selector('[class = "btn btn-primary ok"]').click()
    
    return web

def get_web_data(web):
    res = web.find_element_by_xpath('//*[@id="js-minHeight"]/div[1]/div[1]').text
    res2 = web.find_element_by_xpath('//*[@id="js-minHeight"]/div[1]/div[2]').text
    return res,res2
    

if __name__=='__main__':
    url = 'https://kyfw.12306.cn/otn/resources/login.html'
    web = open_url(url)
    time.sleep(2)
    web = web_login(web,'12306账号','12306密码')
    time.sleep(2)
    res,res2 = get_web_data(web)
    print(res)
    print(res2)
    time.sleep(100)
    web.close()

4.总结

selenium已经学完，我要休息几天，再决定学什么，。。。

朗风风

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
爬虫-python -(12) 验证码 -selenium

1.验证码转化将图片验证码转化为字符串，有两种方式，其一为自己同图像处理识别文字，然后将字符串转化出，其二为让别人去转化，这样就需要上传至别人的网站，识别后将字符串返回给自己。现在学习第二种方法，这样就需要一个网站，这里用的是超级鹰。需要再超级鹰网站注册以及绑定微信免费领取1000积分。（因为让它给你识别验证码需要消费积分）以上过程都走完了，现在需要下载超级鹰的python模板，就是调用超级鹰返回验证码的程序。这个网站上有，直接下载即可。需要简单读取下这个程序，将需要的输入的东西搞明白。1-3分别
复制链接

扫一扫

专栏目录