Python+Selenium实现自动登录163邮箱导出邮件可按时间和内容进行筛选

老中医er

已于 2022-07-13 15:24:11 修改

阅读量1.8k

点赞数

分类专栏：个人Python日记 Python从入门到精通文章标签： python selenium 爬虫

于 2022-06-14 16:44:57 首次发布

本文链接：https://blog.csdn.net/yongguo6816/article/details/125280411

版权

Python从入门到精通同时被 2 个专栏收录

3 篇文章 0 订阅

订阅专栏

个人Python日记

2 篇文章 1 订阅

订阅专栏

文章目录

描述
一、打码平台
二、实现方法
- 1. 过点选验证
- 2. 全部代码
总结
备注

描述

此脚本可以实现网易163邮箱的自动登录，自动过点选验证，按时间和关键字筛选内容，最后导出邮件

一、打码平台

因为我们要过网易云盾的点选验证，所以我们要用到打码平台，我之前也做了ocr识别并返回文字坐标的功能，但是可能技术不到家，很多字识别不了。还是用打码平台把。哪家都行，随便选

二、实现方法

1. 过点选验证

主要难点就是这里了，重点说下这里

        def verification(self):
        try:
            # 这个地方是判断验证是否触发，如果没触发的话就会报错跳出方法
            WebDriverWait(self.driver, 3).until(EC.visibility_of_any_elements_located((By.XPATH, '//*[@class="yidun_tips"]')))
            time.sleep(random.uniform(0.5, 2))
            validation_elenment = self.driver.find_element(By.XPATH, '//*[@class="yidun_tips"]')
            # 将鼠标移动到验证按钮上并点击
            ActionChains(self.driver).move_to_element(validation_elenment).click().perform()
            # 一个无限循环知道验证通过
            while True:
                try:
                    
                    WebDriverWait(self.driver, 3).until(
                        EC.visibility_of_any_elements_located((By.XPATH, '//*[@class="yidun_bg-img"]')))
                    # 获取验证图片
                    validation_img = self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]').get_attribute('src')
                    # 获取要点击的字
                    validation_text = self.driver.find_element(By.XPATH, '//*[@class="yidun_tips__point"]').text
                    validation_text = validation_text.replace('"', '').replace(' ', '')
                    validation_location = self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]').location
                    # 获取图片长宽用来计算坐标
                    validation_size = self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]').size
                    validation_height = validation_size['height']
                    validation_width = validation_size['width']
                    # 获取图片链接并下载
                    ActionChains(self.driver).move_to_element(
                        self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]')).perform()
                    time.sleep(random.uniform(0.5, 2))
                    time.sleep(random.uniform(0.5, 2))
                    r = requests.get(validation_img)
                    fname = 'img/' + self.username + str(int(time.time())) + '.jpg'
                    with open(fname, 'wb') as f:
                        f.write(r.content)
                    print(f'{validation_img}写入成功')
                    # 这里是调用打码平台返回坐标的
                    verify_data = self.verify_code(fname)
                    count = 1
                    x = 0
                    y = 0
                    # 注意，下边的*0.71是我发现页面实际显示的图片大小是我下载的图片的百分之七十一，所以我们要换算一下
                    for i in validation_text:
                        # 因为我们获取的图片的坐标实际伤势图片中心点的坐标，所以我们计算要移动的距离是要用文字坐标减去图片长宽/2
                        # 第一次的起点是图片中心点，所以要判断一下，然后x和y要加上这次移动的坐标
                        if count == 1:
                            text_location_x = (verify_data[i]['X坐标值'] * 0.71) - (validation_width / 2)
                            text_location_y = (verify_data[i]['Y坐标值'] * 0.71) - (validation_height / 2)
                            x = (validation_width / 2) + text_location_x
                            y = (validation_height / 2) + text_location_y
                        # 从上次移动到的地方开始移动
                        else:
                            text_location_x = (verify_data[i]['X坐标值'] * 0.71) - x
                            text_location_y = (verify_data[i]['Y坐标值'] * 0.71) - y
                            x += text_location_x
                            y += text_location_y
                        print(i)
                        move_lenx = 0
                        move_leny = 0
                        """以下是我自己写的模拟人类鼠标移动的方法，自测通过率百分之八十以上，可以直接复制"""
                        while True:
                            if text_location_x < 0:
                                if move_lenx <= text_location_x:
                                    move_x = 0
                                else:
                                    if text_location_x - move_lenx < -10:
                                        move_x = random.uniform(-10, -5)
                                    elif text_location_x - move_lenx < -5:
                                        move_x = random.uniform(-5, -2)
                                    else:
                                        move_x = random.uniform(-2, 0)
                            else:
                                if move_lenx >= text_location_x:
                                    move_x = 0
                                else:
                                    if text_location_x - move_lenx > 10:
                                        move_x = random.uniform(5, 10)
                                    elif text_location_x - move_lenx > 5:
                                        move_x = random.uniform(2, 5)
                                    else:
                                        move_x = random.uniform(0, 2)
                            if text_location_y < 0:
                                if move_leny <= text_location_y:
                                    move_y = 0
                                else:
                                    if text_location_y - move_leny < -10:
                                        move_y = random.uniform(-10, -5)
                                    elif text_location_y - move_leny < -5:
                                        move_y = random.uniform(-5, -2)
                                    else:
                                        move_y = random.uniform(-2, 0)
                            else:
                                if move_leny >= text_location_y:
                                    move_y = 0
                                else:
                                    if text_location_y - move_leny > 10:
                                        move_y = random.uniform(5, 10)
                                    elif text_location_y - move_leny > 5:
                                        move_y = random.uniform(2, 5)
                                    else:
                                        move_y = random.uniform(0, 2)
                            if text_location_x < 0 and text_location_y < 0:
                                if (move_lenx <= text_location_x) and (move_leny <= text_location_y):
                                    break
                            elif text_location_x > 0 and text_location_y > 0:
                                if (move_lenx >= text_location_x) and (move_leny >= text_location_y):
                                    break
                            elif text_location_x > 0 and text_location_y < 0:
                                if (move_lenx >= text_location_x) and (move_leny <= text_location_y):
                                    break
                            elif text_location_x < 0 and text_location_y > 0:
                                if (move_lenx <= text_location_x) and (move_leny >= text_location_y):
                                    break
                            ActionChains(self.driver).move_by_offset(move_x, move_y).perform()
                            move_lenx += move_x
                            move_leny += move_y
                            print(text_location_y, move_leny, 'yyyyyyyyyyyyyyy')
                            print(text_location_x, move_lenx, 'xxxxxxxxxxxxxxx')
                        # if count != 1:
                        #     ActionChains(driver).move_by_offset(-50, 0).click().perform()
                        # else:
                        #     ActionChains(driver).move_by_offset(0, 0).click().perform()
                        ActionChains(self.driver).move_by_offset(random.uniform(-5, 5), random.uniform(-5, 5)).click().perform()
                        time.sleep(random.uniform(0.3, 1.5))
                        count += 1
                    time.sleep(3)
                    try:
                        ActionChains(self.driver).move_to_element(
                            self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]')).perform()
                    except:
                        time.sleep(1)
                        self.driver.find_element(By.ID, 'dologin').click()
                        break
                except:
                    time.sleep(1)
        except:
            traceback.print_exc()

该处使用的url网络请求的数据。

2. 全部代码

# encoding:utf-8
import json
import re
import time
import traceback
import random
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.webdriver import ChromeOptions
import requests
import base64
import os
from setting import base_path
import zipfile
from selenium.webdriver.common.keys import Keys


class email_163:
    def __init__(self, username, password, is_search=False, start_date=False, end_date=False):
        self.username = username
        self.password = password
        self.driver = self.get_driver()
        self.is_search = is_search
        self.is_date = start_date
        self.end_data = end_date

    def time_disposal(self, time_str):
        time_struct = time.strptime(time_str, "%Y%m%d")
        timestamp = int(time.mktime(time_struct))
        return timestamp

    def create_file(self):
        userfile = base_path + '\\' + 'mails' + '\\' + self.username + '\\'
        try:
            os.mkdir(userfile)
        except:
            pass
        return userfile

    def verify_code(self, fname):
        with open(fname, 'rb') as f:  # 以二进制读取本地图片
            data = f.read()
            encodestr = str(base64.b64encode(data), 'utf-8')
        url = 'http://www.tulingtech.xyz/tuling/predict'
        headers = {
        'Content-Type': 'application/json;charset=UTF-8'
        }
        data = {
            'username': 'x',
            'password': 'x',
            'b64': encodestr,
            'ID': 'x',
            'developer': 'x'
        }
        res = requests.post(url, data=json.dumps(data), headers=headers)
        json_data = json.loads(res.text)
        print(json_data)
        return json_data


    def get_driver(self):
        userfile = self.create_file()
        prefs = {"download.default_directory": userfile}
        option = ChromeOptions()
        option.add_experimental_option("prefs", prefs)
        option.add_argument('disable-infobars')
        option.add_experimental_option('excludeSwitches', ['enable-automation'])
        option.add_argument("--disable-blink-features=AutomationControlled")
        driver = webdriver.Chrome('file/chromedriver.exe', options=option)
        with open('file/text.js') as f:
            js = f.read()

        driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": js
        })
        driver.maximize_window()
        return driver

    def verification(self):
        try:
            # 这个地方是判断验证是否触发，如果没触发的话就会报错跳出方法
            WebDriverWait(self.driver, 3).until(EC.visibility_of_any_elements_located((By.XPATH, '//*[@class="yidun_tips"]')))
            time.sleep(random.uniform(0.5, 2))
            validation_elenment = self.driver.find_element(By.XPATH, '//*[@class="yidun_tips"]')
            # 将鼠标移动到验证按钮上并点击
            ActionChains(self.driver).move_to_element(validation_elenment).click().perform()
            # 一个无限循环知道验证通过
            while True:
                try:

                    WebDriverWait(self.driver, 3).until(
                        EC.visibility_of_any_elements_located((By.XPATH, '//*[@class="yidun_bg-img"]')))
                    # 获取验证图片
                    validation_img = self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]').get_attribute('src')
                    # 获取要点击的字
                    validation_text = self.driver.find_element(By.XPATH, '//*[@class="yidun_tips__point"]').text
                    validation_text = validation_text.replace('"', '').replace(' ', '')
                    validation_location = self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]').location
                    # 获取图片长宽用来计算坐标
                    validation_size = self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]').size
                    validation_height = validation_size['height']
                    validation_width = validation_size['width']
                    # 获取图片链接并下载
                    ActionChains(self.driver).move_to_element(
                        self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]')).perform()
                    time.sleep(random.uniform(0.5, 2))
                    time.sleep(random.uniform(0.5, 2))
                    r = requests.get(validation_img)
                    fname = 'img/' + self.username + str(int(time.time())) + '.jpg'
                    with open(fname, 'wb') as f:
                        f.write(r.content)
                    print(f'{validation_img}写入成功')
                    # 这里是调用打码平台返回坐标的
                    verify_data = self.verify_code(fname)
                    count = 1
                    x = 0
                    y = 0
                    # 注意，下边的*0.71是我发现页面实际显示的图片大小是我下载的图片的百分之七十一，所以我们要换算一下
                    for i in validation_text:
                        # 因为我们获取的图片的坐标实际伤势图片中心点的坐标，所以我们计算要移动的距离是要用文字坐标减去图片长宽/2
                        # 第一次的起点是图片中心点，所以要判断一下，然后x和y要加上这次移动的坐标
                        if count == 1:
                            text_location_x = (verify_data[i]['X坐标值'] * 0.71) - (validation_width / 2)
                            text_location_y = (verify_data[i]['Y坐标值'] * 0.71) - (validation_height / 2)
                            x = (validation_width / 2) + text_location_x
                            y = (validation_height / 2) + text_location_y
                        # 从上次移动到的地方开始移动
                        else:
                            text_location_x = (verify_data[i]['X坐标值'] * 0.71) - x
                            text_location_y = (verify_data[i]['Y坐标值'] * 0.71) - y
                            x += text_location_x
                            y += text_location_y
                        print(i)
                        move_lenx = 0
                        move_leny = 0
                        """以下是我自己写的模拟人类鼠标移动的方法，自测通过率百分之八十以上，可以直接复制"""
                        while True:
                            if text_location_x < 0:
                                if move_lenx <= text_location_x:
                                    move_x = 0
                                else:
                                    if text_location_x - move_lenx < -10:
                                        move_x = random.uniform(-10, -5)
                                    elif text_location_x - move_lenx < -5:
                                        move_x = random.uniform(-5, -2)
                                    else:
                                        move_x = random.uniform(-2, 0)
                            else:
                                if move_lenx >= text_location_x:
                                    move_x = 0
                                else:
                                    if text_location_x - move_lenx > 10:
                                        move_x = random.uniform(5, 10)
                                    elif text_location_x - move_lenx > 5:
                                        move_x = random.uniform(2, 5)
                                    else:
                                        move_x = random.uniform(0, 2)
                            if text_location_y < 0:
                                if move_leny <= text_location_y:
                                    move_y = 0
                                else:
                                    if text_location_y - move_leny < -10:
                                        move_y = random.uniform(-10, -5)
                                    elif text_location_y - move_leny < -5:
                                        move_y = random.uniform(-5, -2)
                                    else:
                                        move_y = random.uniform(-2, 0)
                            else:
                                if move_leny >= text_location_y:
                                    move_y = 0
                                else:
                                    if text_location_y - move_leny > 10:
                                        move_y = random.uniform(5, 10)
                                    elif text_location_y - move_leny > 5:
                                        move_y = random.uniform(2, 5)
                                    else:
                                        move_y = random.uniform(0, 2)
                            if text_location_x < 0 and text_location_y < 0:
                                if (move_lenx <= text_location_x) and (move_leny <= text_location_y):
                                    break
                            elif text_location_x > 0 and text_location_y > 0:
                                if (move_lenx >= text_location_x) and (move_leny >= text_location_y):
                                    break
                            elif text_location_x > 0 and text_location_y < 0:
                                if (move_lenx >= text_location_x) and (move_leny <= text_location_y):
                                    break
                            elif text_location_x < 0 and text_location_y > 0:
                                if (move_lenx <= text_location_x) and (move_leny >= text_location_y):
                                    break
                            ActionChains(self.driver).move_by_offset(move_x, move_y).perform()
                            move_lenx += move_x
                            move_leny += move_y
                            print(text_location_y, move_leny, 'yyyyyyyyyyyyyyy')
                            print(text_location_x, move_lenx, 'xxxxxxxxxxxxxxx')
                        # if count != 1:
                        #     ActionChains(driver).move_by_offset(-50, 0).click().perform()
                        # else:
                        #     ActionChains(driver).move_by_offset(0, 0).click().perform()
                        ActionChains(self.driver).move_by_offset(random.uniform(-5, 5), random.uniform(-5, 5)).click().perform()
                        time.sleep(random.uniform(0.3, 1.5))
                        count += 1
                    time.sleep(3)
                    try:
                        ActionChains(self.driver).move_to_element(
                            self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]')).perform()
                    except:
                        time.sleep(1)
                        self.driver.find_element(By.ID, 'dologin').click()
                        break
                except:
                    time.sleep(1)
        except:
            traceback.print_exc()

    def get_email(self):
        if self.is_search and self.is_date is False:
            self.serach(self.is_search)
        elif self.is_date and self.is_search is False:
            where_date = f'{self.is_date}:{self.end_data}'
            self.search_date(where_date)
        elif self.is_date and self.is_search:
            self.serach(self.is_search)
            self.driver.refresh()
            fly_id = 0
            while True:
                try:
                    WebDriverWait(self.driver, 10).until(
                        EC.visibility_of_any_elements_located((By.XPATH, f'//*[@id="fly{fly_id}"]')))
                except:
                    self.driver.close()
                    break
                for num in range(2, 22):
                    try:
                        email_date = self.driver.find_elements(By.XPATH, f'//*[@class="Dg0"]/div[3]/div[{num}]/div/div[3]/div[1]')[0].get_attribute('title')
                        email_date = re.findall('\d+年\d+月\d+日',  email_date)[0].replace('年', '').replace('月', '').replace('日', '')
                        email_date = self.time_disposal(email_date)
                        if self.time_disposal(self.is_date) <= email_date <= self.time_disposal(self.end_data):
                            comfirmdel = self.driver.find_element(By.XPATH, f'//*[@class="Dg0"]/div[3]/div[{num}]//label')
                            self.driver.execute_script("arguments[0].click();", comfirmdel)
                            # self.driver.find_element(By.XPATH, '//*[@class="tv0"]/div[2]//label').click()
                    except:
                        traceback.print_exc()
                comfirmdel = self.driver.find_elements(By.XPATH, f'//*[@role="toolbar"]/div[4]/div[last()]')[1]
                self.driver.execute_script("arguments[0].click();", comfirmdel)
                WebDriverWait(self.driver, 10).until(
                    EC.visibility_of_any_elements_located((By.XPATH, "//span[text()='导出选中邮件']")))
                self.driver.find_element(By.XPATH, "//span[text()='导出选中邮件']").click()
                WebDriverWait(self.driver, 10).until(
                    EC.visibility_of_any_elements_located((By.XPATH, "//span[text()='下一页']")))
                comfirmdel = self.driver.find_element(By.XPATH, "//span[text()='下一页']")
                self.driver.execute_script("arguments[0].click();", comfirmdel)
                fly_id += 1
        self.driver.refresh()
        time.sleep(3)
        if self.is_date is False or self.is_search is False:
            fly_id = 0
            while True:
                try:
                    WebDriverWait(self.driver, 10).until(
                        EC.visibility_of_any_elements_located((By.XPATH, f'//*[@id="fly{fly_id}"]')))
                except:
                    self.driver.close()
                    break
                self.driver.find_element(By.XPATH, f'//*[@id="fly{fly_id}"]').click()
                self.driver.find_element(By.XPATH, '//*[@role="toolbar"]/div[4]/div[last()]').click()
                WebDriverWait(self.driver, 10).until(EC.visibility_of_any_elements_located((By.XPATH, "//span[text()='导出选中邮件']")))
                self.driver.find_element(By.XPATH, "//span[text()='导出选中邮件']").click()
                try:
                    WebDriverWait(self.driver, 10).until(EC.visibility_of_any_elements_located((By.XPATH, "//span[text()='下一页']")))
                except:
                    traceback.print_exc()
                    self.driver.close()
                    break
                self.driver.find_element(By.XPATH, "//span[text()='下一页']").click()
                fly_id += 1
        self.decompression()


    def decompression(self):
        userfile = self.create_file()
        decompression_file = userfile + 'decompression' + '\\'
        try:
            os.mkdir(decompression_file)
        except:
            pass
        file_dir = userfile
        for root, dirs, files in os.walk(file_dir, topdown=False):
            for i in files:
                zip_path = userfile + i
                f = zipfile.ZipFile(zip_path, 'a')
                for zip_file in f.namelist():
                    try:
                        f.extract(zip_file, decompression_file)
                    except:
                        traceback.print_exc()
                f.close()

    def email_main(self):
        self.driver.get('https://mail.163.com/')
        WebDriverWait(self.driver, 10).until(EC.visibility_of_any_elements_located((By.XPATH, '//*[@id="loginDiv"]/iframe')))
        time.sleep(random.uniform(0.5, 2))
        iframe = self.driver.find_element(By.XPATH, '//*[@id="loginDiv"]/iframe')
        self.driver.switch_to.frame(iframe)
        WebDriverWait(self.driver, 10).until(EC.visibility_of_any_elements_located((By.XPATH, '//*[@name="email"]')))
        time.sleep(random.uniform(0.5, 2))
        self.driver.find_element(By.XPATH, '//*[@name="email"]').send_keys(self.username)
        time.sleep(random.uniform(0.5, 2))
        self.driver.find_element(By.XPATH, '//*[@name="password"]').send_keys(self.password)
        time.sleep(random.uniform(0.5, 2))
        self.driver.find_element(By.ID, 'dologin').click()
        self.verification()
        WebDriverWait(self.driver, 10).until(EC.visibility_of_any_elements_located((By.XPATH, '//*[@id="dvMultiTabWrapper"]')))
        self.driver.find_element(By.XPATH, '//*[@id="dvMultiTabWrapper"]//ul/li[4]').click()
        self.get_email()

    def serach(self, key):
        time.sleep(3)
        WebDriverWait(self.driver, 10).until(
            EC.visibility_of_any_elements_located((By.XPATH, '//*[@role="search"]//input')))
        self.driver.find_element(By.XPATH, '//*[@role="search"]//input').send_keys(key)
        time.sleep(5)
        self.driver.find_element(By.XPATH, '//*[@role="search"]//input').send_keys(Keys.ENTER)
        time.sleep(5)

    def search_date(self, date):
        now_url = self.driver.current_url
        print(now_url)
        user_sid = re.findall('sid=(.*?)&', now_url)[0]
        next_url = 'https://mail.163.com/js6/main.jsp?sid=%s&df=mail163_letter#module=mbox.ListModule|{"order":"date","desc":true,"fids":[1],"filter":{"sentDate":"%s"}}' % (user_sid, date)
        self.driver.get(next_url)


username = 'xx'
password = 'xx'
get_email = email_163(username, password, is_search='在', start_date='20220101', end_date='20220614')
get_email.email_main()