selenium入门级项目 - 豆豆玩竞猜

本文介绍了使用Selenium自动化测试工具结合OCR技术处理网页弹窗及验证码识别,模拟登录并提交表格,以及抓取数据存储到MySQL数据库的过程。涉及的主要步骤包括:点击关闭弹窗、识别并输入验证码、模拟用户投注、数据收集与存储。此外,文章还展示了如何处理网页元素和截屏识别验证码的方法。
摘要由CSDN通过智能技术生成

在初步学习selenium的定位方法之后,我们就可以找些网站来测试了,这次我选择的网站是豆豆玩

测试目的

  1. 点掉首页弹窗

在这里插入图片描述

  1. 模拟登陆,含简单的验证码识别
    在这里插入图片描述

  2. 表格提交
    在这里插入图片描述

  3. 获取15期结果,存进Mysql数据库

思路与主要代码

去JS弹窗

这个容易,获取CSS标签,点掉即可:

s = Service("C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe")
driver = webdriver.Chrome(options=chrome_options, service=s)
ddwURL = "http://www.doudouwan.net/"
driver.get(url=ddwURL)
time.sleep(6)
driver.find_elements(by=By.CLASS_NAME,value='layui-layer-btn0')[0].click()

模拟登陆

这里登陆填账号密码不再赘述,重点讨论验证码识别。

网站的验证码文件是html格式,并且是随机的,网页打开就会刷新一次:2578

我的思路是:网页截取验证码所在区域的图片,保存到本地,然后进行文字OCR识别,

我们不研究识别的原理,现在有很多开源的库可以供我们使用,选择了ddddocr,除了有广告,基本可以使用:

def validate(url):
    ocr = ddddocr.DdddOcr()
    with open(url, 'rb') as f:
        image = f.read()
    res = ocr.classification(image)
    return res


def snipScreent(url):
    driver.get(url)
    time.sleep(2)
    width = driver.execute_script("return document.documentElement.scrollWidth")
    height = driver.execute_script("return document.documentElement.scrollHeight")
    driver.set_window_size(width, height)  # 修改浏览器窗口大小

    # 搜索结果部分完整截图
    r_node = driver.find_element(by='xpath', value='/html/body/div[3]/div/div[2]/dl/dd[3]/img')
    print('网页模块尺寸:height={},width={}'.format(r_node.size['height'], r_node.size['width']))
    times = int(time.time())
    pngPath = r'D:\image\%s.png' % times
    r_node.screenshot(pngPath)
    # im = Image.open(pngPath)
    # print("截图尺寸:height={},width={}".format(im.size[1], im.size[0]))
    return pngPath

picPath = snipScreent(url=ddwURL)
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.i.i_username > input").send_keys("bgone")
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.i.i_password > input").send_keys("123456")


# downURL = "http://doudouwan.net/register/register_ver_code.html"
# picPath = download(url=downURL)
num = validate(url=picPath)
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.i.i_code > input").send_keys(num)
time.sleep(3)
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.a > a.submit").click()
randomIdle()

表格提交

投注本身也不难,但我们需要写一个算法以尽量维持程序运行:

Created with Raphaël 2.3.0 Start 初始投入100 等待结果 本局盈利d? 总共豆豆数 - 初始投入 * 2> 0? 初始投入 * 2 End yes no yes no
def throw():
    asserT = False
    while not asserT:
        try:
            t = driver.find_element(by=By.CSS_SELECTOR,
                                    value=r"body > div.fun_main > div.fun_left > div.left_table > table > tbody > tr:nth-child(6) > td:nth-child(1)").get_attribute(
                "textContent")
            css = "#revoke_%s > a" % t
            driver.find_element(by=By.CSS_SELECTOR, value=css).click()
        except Exception as ep:
            print(ep)
            randomIdle()
            driver.refresh()
        else:
            asserT = True

def bet(input=0):
    a = getCoins()
    driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value1").send_keys(input)
    driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value3").send_keys(input)
    driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value5").send_keys(input)
    driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value7").send_keys(input)
    driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value9").send_keys(input)
    driver.find_element(by=By.XPATH, value="/html/body/div[3]/div[1]/div[5]/div/div[3]/div[2]/div/div[3]/a").click()
    return int(a)-5*input

def getTimer():
    decrypt = driver.find_element(by=By.CSS_SELECTOR,value="#bettingLottTime").get_attribute("textContent")
    partake = driver.find_element(by=By.CSS_SELECTOR,value="#bettingOverTime").get_attribute("textContent")
    if "已停止参与" in partake:
        if "解谜中,请稍后" in decrypt:
            return True
    return False

p = False
while not p:
    initialC = 20
    mp = False
    for i in range(1,maxloop):
        # time.sleep(50)
        driver.get(url=guessURL90)
        throw()
        # 投入
        a = bet(input=initialC)

        # 等待解谜
        timerAssert = getTimer()
        while not timerAssert:
            time.sleep(2)
            timerAssert = getTimer()
        time.sleep(5)
        #
        # 获取结果
        driver.refresh()
        c = collection()
        print(c)
        if c < 0:
            initialC = initialC * 2
            if initialC*5 > (a+c):
                break
            time.sleep(5)
        elif c > 0:
            mp = True
            break

数据库操作

先本地搭建mysql服务器,Navicat Premium 15 建个表ddw,字段为:
在这里插入图片描述
主键在Number上;然后脚本获取数据并存入:


class Sql():
    def __int__(self):
        self.host = "192.168.222.1"
        self.username = "root"
        self.password = ""

    def connectMysql(self):
        # 然后连接数据库
        connection = pymysql.connect(host="localhost",
                                     user="root",
                                     password="",
                                     db='ddw',
                                     charset='utf8mb4',
                                     cursorclass=pymysql.cursors.DictCursor)
        return connection

    def instertMysql(self,num,date,result,coin,hits,inn,out):
        connection = self.connectMysql()
        # 对数据库进行操作
        try:
            with connection.cursor() as cursor:
                # 创建新记录
                sql = "INSERT INTO `ddw`.`ddw` (`Number`, `DateTime`, `Result`, `Coins`, `Hits`, `In`, `Out`) VALUES (%s, %s, %s, %s, %s, %s, %s)"
                cursor.execute(sql, (num, date, result, coin, hits, inn, out))

            # 默认不会自动提交,所以需要我们自己提交来保存改变后的内容*
            connection.commit()

            with connection.cursor() as cursor:
                # 读取单个记录
                sql = "SELECT Number FROM ddw.ddw WHERE Number=%s"
                cursor.execute(sql, (num))
                result = cursor.fetchone()
                print(result)
        finally:
            connection.close()

    def selectMysql(self,index=0):
        # def instertMysql(self, mum, date, result, coin, hits, inn, out):
            # 对数据库进行操作
        try:
            with connection.cursor() as cursor:
                # 读取单个记录
                if not index:
                    sql = "select * from ddw ORDER BY 'Number' DESC LIMIT 1"
                else:
                    sql = "select * from ddw ORDER BY 'Number' DESC LIMIT %d"
                cursor.execute(sql, (index))
                result = cursor.fetchone()
                print(result)
                return result
        finally:
            connection.close()



def collection():
    year = datetime.datetime.now().year
    sql = Sql()
    connection = sql.connectMysql()
    content = driver.find_elements(by=By.XPATH,value="/html/body/div[3]/div[1]/div[5]/table/tbody/tr/td")
    alist = []
    for i in content:
        html = etree.fromstring(i.get_attribute("innerHTML"), parser=etree.HTMLParser())
        try:
            alist.append(html.xpath("//text()"))
        except Exception:
            alist.append([i.get_attribute("innerHTML")])
    usefulContent = alist[36:-1]
        #     0         1          2          3       4
        #  `Number`, `DateTime`, `Result`, `Coins`, `Hits`, `In`, `Out`
    for index in range(0,len(usefulContent),7):
        # import pdb
        # pdb.set_trace()
        # n = index % 7
    # if n == 0:
        Number= int(usefulContent[index][0])
    # elif n == 1:
        DateTime = str(year) + "-" + usefulContent[index+1][0]
    # elif n == 2:
        Result = int(usefulContent[index+2][0])
    # elif n == 3:
        Coins = "".join(usefulContent[index+3][0].split(","))
    # elif n == 4:
        Hits = int("".join(usefulContent[index+4][0].split(",")))
    # elif n == 5:
        In = int("".join(usefulContent[index+5][0].split(":")[-1].split(",")))
        Out = int("".join(usefulContent[index+5][1].split(":")[-1].split(",")))

        with connection.cursor() as cursor:
            try:
                existOne = "SELECT Number FROM ddw.ddw WHERE Number=%s"
                cursor.execute(existOne, (Number))
                result = cursor.fetchone()
                print(result)
                if not result:
                    sql.instertMysql(Number, DateTime, Result, Coins, Hits, In, Out)
            except Exception as ep:
                print(ep)
            finally:
                cursor.close()
                connection.close()

        return In-Out

结果展示:
在这里插入图片描述

总结

难度系数低,可以获取数据用于日后数据分析

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值