使用selenium实现极验验证码

from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver .common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from PIL import Image
from io import BytesIO
from lxml import etree
import time
import pymssql


class TianYanLogin(object):
    def __init__(self):
        self.url = 'https://www.tianyancha.com/search?key=阿里巴巴'
        self.browser = webdriver.Chrome()
        self.conn = pymssql.connect(host='39.97.106.44', user='yishan1', password='yishan2018.', database='CFCMangeSoft')
        self.cur = self.conn.cursor()
        self.wait = WebDriverWait(self.browser, 20)

    # def __del__(self):
    #     self.browser.close()

    def login(self):
        self.browser.get(self.url)
        self.browser.maximize_window()
        time.sleep(1)
        zhanghao = self.browser.find_elements_by_xpath("//div[@class='module module1 module2 loginmodule collapse in']/div[@class='title-tab text-center']/div[@class='title']")[0]
        zhanghao.click()
        time.sleep(1)
        my_phone = self.browser.find_element_by_xpath("//*[@id='web-content']/div/div[2]/div/div[2]/div/div[3]/div[2]/div[2]/input")
        my_phone.click()
        time.sleep(1)
        my_phone.send_keys('154366')
        my_password = self.browser.find_element_by_xpath("//*[@id='web-content']/div/div[2]/div/div[2]/div/div[3]/div[2]/div[3]/input")
        my_password.click()
        time.sleep(1)
        my_password.send_keys('2355wazh')

    def button(self):
        '''
        返回登录按钮
        :return:
        '''
        time.sleep(3)
        buttuon = self.browser.find_element_by_xpath("//*[@id='web-content']/div/div[2]/div/div[2]/div/div[3]/div[2]/div[5]")
        buttuon.click()

    def get_position(self):
        '''
        获取验证码位置
        :return: 验证码位置元祖
        '''
        # time.sleep(3)
        # img = self.browser.find_element_by_xpath("//*[@class='gt_popup_wrap']/div[2]")
        # lcoation = img.location
        # size = img.size
        # top, bottom, left, right = lcoation['y'], lcoation['y'] + size['height'], lcoation['x'], lcoation['x']-400 + size['width'] + 400
        top = 340
        bottom = 510
        left = 750
        right = 1140
        return (top, bottom, left, right)

    def get_spider(self):
        '''
        获取拼图
        :return: 返回拼图
        '''
        time.sleep(3)
        silder = self.browser.find_element_by_xpath("//div[10]/div[2]/div[2]/div[1]/div[2]/div[1]/a[1]/div[2]")
        return silder

    def get_geetest_image(self, name='captcha.png'):
        """
        获取验证码图片
        :return: 图片对象
        """
        top, bottom, left, right = self.get_position()
        print('验证码位置', top, bottom, left, right)
        screenshot = self.get_screenshot()
        captcha = screenshot.crop((left, top, right, bottom))
        captcha.save(name)
        return captcha

    def get_screenshot(self):
        '''
        获取网页截图
        :return: 截图对象
        '''
        time.sleep(3)
        screenshot = self.browser.get_screenshot_as_png()
        screenshot = Image.open(BytesIO(screenshot))
        return screenshot

    def get_slide(self):
        '''
        获取滑动按钮
        :return: 滑动按钮
        '''
        time.sleep(3)
        slide = self.browser.find_element_by_xpath("//div[10]/div[2]/div[2]/div[2]/div[2]")
        return slide

    def is_pixel_equal(self,image1,image2,x,y):
        '''
        判断两个图片像素是否一样
        :param image1: 没有缺口的图片
        :param image2: 有缺口的图片
        :param x: 位置x
        :param y: 位置y
        :return:  像素是否相同
        '''
        pixel1 = image1.load()[x, y]
        pixel2 = image2.load()[x, y]
        threshold = 60
        if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(
                pixel1[2] - pixel2[2]) < threshold:
            return True
        else:
            return False

    def get_gap(self, image1, image2):
        """
        获取缺口偏移量
        :param image1: 带缺口图片
        :param image2: 不带缺口图片
        :return:
        """
        left = 80
        # 判断有缺口和没有缺口的图片像素是否一致
        # image1.size[0]是x轴   [1]是y轴
        print(image1.size[0])
        print(image1.size[1])
        for i in range(left,image1.size[0]):
            for j in range(image1.size[1]):
                # print(self.is_pixel_equal(image1, image2, i, j))
                if not self.is_pixel_equal(image1, image2, i, j):
                    left = i

        if left == 80:
            for i in range(90, image1.size[0]):
                for j in range(image1.size[1]):
                    # print(self.is_pixel_equal(image1, image2, i, j))
                    if not self.is_pixel_equal(image1, image2, i, j):
                        left = i
        return left

    def get_track(self, distance):
        """
        根据偏移量获取移动轨迹
        :param distance: 偏移量
        :return: 移动轨迹
        """
        # 移动轨迹
        track = []
        # 当前位移
        current = 0
        # 减速阈值
        mid = distance * 4 / 5
        # 计算间隔
        t = 0.2
        # 初速度
        v = 0
        while current < distance:
            if current < mid:
                # 加速度为正2
                a = 2
            else:
                # 加速度为负3
                a = -3
            # 初速度v0
            v0 = v
            # 当前速度v = v0 + at
            v = v0 + a * t
            # 移动距离x = v0t + 1/2 * a * t^2
            move = v0 * t + 1 / 2 * a * t * t
            # 当前位移
            current += move
            # 加入轨迹
            track.append(round(move))
        return track

    def move_to_gap(self, slider, track):
        time.sleep(1)
        """
        拖动滑块到缺口处
        :param slider: 滑块
        :param track: 轨迹
        :return:
        """
        ActionChains(self.browser).click_and_hold(slider).perform()
        num = 1
        for x in track:
            num += 1
            if num == 10:
                time.sleep(0.2)
            ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
        time.sleep(1)
        ActionChains(self.browser).release().perform()
        time.sleep(1)

    def chick(self):
        self.login()
        self.button()
        img1 = self.get_geetest_image()
        slide = self.get_slide()
        slide.click()
        img2 = self.get_geetest_image()
        lefts = self.get_gap(img1, img2)
        if 330 > lefts > 200:
            lefts = lefts - 150
        elif lefts > 330:
            lefts = lefts - 170
        elif 200 > lefts > 100:
            lefts = lefts - 110
        else:
            lefts = lefts - 70
        tack = self.get_track(lefts)
        self.move_to_gap(slide, tack)
        l = None
        for i in range(4):
            time.sleep(1)
            if '失败' in self.browser.page_source:
                lefts = self.get_gap(img1, img2)
                if i == 0:
                    if 330 > lefts > 200:
                        lefts = lefts - 140
                    elif lefts > 330:
                        lefts = lefts - 160
                    elif 200 > lefts > 100:
                        lefts = lefts - 100
                    else:
                        lefts = lefts - 40
                elif i == 1:
                    if 330 > lefts > 200:
                        lefts = lefts - 130
                    elif lefts > 330:
                        lefts = lefts - 150
                    elif 200 > lefts > 100:
                        lefts = lefts - 90
                    else:
                        lefts = lefts - 50
                elif i == 2:
                    if 330 > lefts > 200:
                        lefts = lefts - 160
                    elif lefts > 330:
                        lefts = lefts - 180
                    elif 200 > lefts > 100:
                        lefts = lefts - 120
                    else:
                        lefts = lefts - 65
                tack = self.get_track(lefts)
                self.move_to_gap(slide,tack)
                time.sleep(1)
            elif  '重试' in self.browser.page_source:
                img1 = self.get_geetest_image()
                slide = self.get_slide()
                slide.click()
                img2 = self.get_geetest_image()
                lefts = self.get_gap(img1, img2)
                if 330 > lefts > 200:
                    lefts = lefts - 150
                elif lefts > 330:
                    lefts = lefts - 170
                elif 200 > lefts > 100:
                    lefts = lefts - 110
                else:
                    lefts = lefts - 70
                print("xixixiixix")
                print(lefts)
                tack = self.get_track(lefts)
                self.move_to_gap(slide, tack)
                time.sleep(1)
            else:
                l = '通过'
        if not l:
            self.chick()
        else:
            print("这里面进来了码")
            self.begin()

    def begin(self):
        sql = """select top 200 oname from OrganizationInfo_similarityZhu where oname like '%公司%' and img is not null"""
        self.cur.execute(sql)
        datas = self.cur.fetchall()
        for data in datas:
            print('这里就没有动码')
            time.sleep(1)
            name = data[0]
            self.browser.find_element_by_xpath("//input[@id='header-company-search']").clear()
            self.browser.find_elements_by_xpath("//input[@id='header-company-search']")[0].send_keys(name)
            self.browser.find_element_by_xpath("//div[@class='input-group-btn btn -sm btn-primary']").click()
            response = etree.HTML(self.browser.page_source)
            img = response.xpath(
                "//*[@id='web-content']/div/div[1]/div[3]/div[2]/div[1]/div/div[2]/div/div[2]/img/@data-src")
            if not img:
                img = response.xpath(
                    "//*[@id='web-content']/div/div[1]/div[4]/div[2]/div[1]/div/div[2]/div/div[2]/img/@data-src")
            if img:
                img = img[0]
            sql = """update OrganizationInfo_similarityZhu set img='{}' where oname='{}'""".format(img, name)
            print(sql)
            self.cur.execute(sql)
            self.conn.commit()

        else:
            pass


if __name__ == '__main__':
    tianyan = TianYanLogin()
    tianyan.chick()



































计算下面拖动按钮偏移量的时候 花了好大功夫 但是还是不够完善

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值