描述
此脚本可以实现网易163邮箱的自动登录,自动过点选验证,按时间和关键字筛选内容,最后导出邮件
一、打码平台
因为我们要过网易云盾的点选验证,所以我们要用到打码平台,我之前也做了ocr识别并返回文字坐标的功能,但是可能技术不到家,很多字识别不了。还是用打码平台把。哪家都行,随便选
二、实现方法
1. 过点选验证
主要难点就是这里了,重点说下这里
def verification(self):
try:
# 这个地方是判断验证是否触发,如果没触发的话就会报错跳出方法
WebDriverWait(self.driver, 3).until(EC.visibility_of_any_elements_located((By.XPATH, '//*[@class="yidun_tips"]')))
time.sleep(random.uniform(0.5, 2))
validation_elenment = self.driver.find_element(By.XPATH, '//*[@class="yidun_tips"]')
# 将鼠标移动到验证按钮上并点击
ActionChains(self.driver).move_to_element(validation_elenment).click().perform()
# 一个无限循环知道验证通过
while True:
try:
WebDriverWait(self.driver, 3).until(
EC.visibility_of_any_elements_located((By.XPATH, '//*[@class="yidun_bg-img"]')))
# 获取验证图片
validation_img = self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]').get_attribute('src')
# 获取要点击的字
validation_text = self.driver.find_element(By.XPATH, '//*[@class="yidun_tips__point"]').text
validation_text = validation_text.replace('"', '').replace(' ', '')
validation_location = self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]').location
# 获取图片长宽用来计算坐标
validation_size = self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]').size
validation_height = validation_size['height']
validation_width = validation_size['width']
# 获取图片链接并下载
ActionChains(self.driver).move_to_element(
self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]')).perform()
time.sleep(random.uniform(0.5, 2))
time.sleep(random.uniform(0.5, 2))
r = requests.get(validation_img)
fname = 'img/' + self.username + str(int(time.time())) + '.jpg'
with open(fname, 'wb') as f:
f.write(r.content)
print(f'{validation_img}写入成功')
# 这里是调用打码平台返回坐标的
verify_data = self.verify_code(fname)
count = 1
x = 0
y = 0
# 注意,下边的*0.71是我发现页面实际显示的图片大小是我下载的图片的百分之七十一,所以我们要换算一下
for i in validation_text:
# 因为我们获取的图片的坐标实际伤势图片中心点的坐标,所以我们计算要移动的距离是要用文字坐标减去图片长宽/2
# 第一次的起点是图片中心点,所以要判断一下,然后x和y要加上这次移动的坐标
if count == 1:
text_location_x = (verify_data[i]['X坐标值'] * 0.71) - (validation_width / 2)
text_location_y = (verify_data[i]['Y坐标值'] * 0.71) - (validation_height / 2)
x = (validation_width / 2) + text_location_x
y = (validation_height / 2) + text_location_y
# 从上次移动到的地方开始移动
else:
text_location_x = (verify_data[i]['X坐标值'] * 0.71) - x
text_location_y = (verify_data[i]['Y坐标值'] * 0.71) - y
x += text_location_x
y += text_location_y
print(i)
move_lenx = 0
move_leny = 0
"""以下是我自己写的模拟人类鼠标移动的方法,自测通过率百分之八十以上,可以直接复制"""
while True:
if text_location_x < 0:
if move_lenx <= text_location_x:
move_x = 0
else:
if text_location_x - move_lenx < -10:
move_x = random.uniform(-10, -5)
elif text_location_x - move_lenx < -5:
move_x = random.uniform(-5, -2)
else:
move_x = random.uniform(-2, 0)
else:
if move_lenx >= text_location_x:
move_x = 0
else:
if text_location_x - move_lenx > 10:
move_x = random.uniform(5, 10)
elif text_location_x - move_lenx > 5:
move_x = random.uniform(2, 5)
else:
move_x = random.uniform(0, 2)
if text_location_y < 0:
if move_leny <= text_location_y:
move_y = 0
else:
if text_location_y - move_leny < -10:
move_y = random.uniform(-10, -5)
elif text_location_y - move_leny < -5:
move_y = random.uniform(-5, -2)
else:
move_y = random.uniform(-2, 0)
else:
if move_leny >= text_location_y:
move_y = 0
else:
if text_location_y - move_leny > 10:
move_y = random.uniform(5, 10)
elif text_location_y - move_leny > 5:
move_y = random.uniform(2, 5)
else:
move_y = random.uniform(0, 2)
if text_location_x < 0 and text_location_y < 0:
if (move_lenx <= text_location_x) and (move_leny <= text_location_y):
break
elif text_location_x > 0 and text_location_y > 0:
if (move_lenx >= text_location_x) and (move_leny >= text_location_y):
break
elif text_location_x > 0 and text_location_y < 0:
if (move_lenx >= text_location_x) and (move_leny <= text_location_y):
break
elif text_location_x < 0 and text_location_y > 0:
if (move_lenx <= text_location_x) and (move_leny >= text_location_y):
break
ActionChains(self.driver).move_by_offset(move_x, move_y).perform()
move_lenx += move_x
move_leny += move_y
print(text_location_y, move_leny, 'yyyyyyyyyyyyyyy')
print(text_location_x, move_lenx, 'xxxxxxxxxxxxxxx')
# if count != 1:
# ActionChains(driver).move_by_offset(-50, 0).click().perform()
# else:
# ActionChains(driver).move_by_offset(0, 0).click().perform()
ActionChains(self.driver).move_by_offset(random.uniform(-5, 5), random.uniform(-5, 5)).click().perform()
time.sleep(random.uniform(0.3, 1.5))
count += 1
time.sleep(3)
try:
ActionChains(self.driver).move_to_element(
self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]')).perform()
except:
time.sleep(1)
self.driver.find_element(By.ID, 'dologin').click()
break
except:
time.sleep(1)
except:
traceback.print_exc()
该处使用的url网络请求的数据。
2. 全部代码
# encoding:utf-8
import json
import re
import time
import traceback
import random
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.webdriver import ChromeOptions
import requests
import base64
import os
from setting import base_path
import zipfile
from selenium.webdriver.common.keys import Keys
class email_163:
def __init__(self, username, password, is_search=False, start_date=False, end_date=False):
self.username = username
self.password = password
self.driver = self.get_driver()
self.is_search = is_search
self.is_date = start_date
self.end_data = end_date
def time_disposal(self, time_str):
time_struct = time.strptime(time_str, "%Y%m%d")
timestamp = int(time.mktime(time_struct))
return timestamp
def create_file(self):
userfile = base_path + '\\' + 'mails' + '\\' + self.username + '\\'
try:
os.mkdir(userfile)
except:
pass
return userfile
def verify_code(self, fname):
with open(fname, 'rb') as f: # 以二进制读取本地图片
data = f.read()
encodestr = str(base64.b64encode(data), 'utf-8')
url = 'http://www.tulingtech.xyz/tuling/predict'
headers = {
'Content-Type': 'application/json;charset=UTF-8'
}
data = {
'username': 'x',
'password': 'x',
'b64': encodestr,
'ID': 'x',
'developer': 'x'
}
res = requests.post(url, data=json.dumps(data), headers=headers)
json_data = json.loads(res.text)
print(json_data)
return json_data
def get_driver(self):
userfile = self.create_file()
prefs = {"download.default_directory": userfile}
option = ChromeOptions()
option.add_experimental_option("prefs", prefs)
option.add_argument('disable-infobars')
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome('file/chromedriver.exe', options=option)
with open('file/text.js') as f:
js = f.read()
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": js
})
driver.maximize_window()
return driver
def verification(self):
try:
# 这个地方是判断验证是否触发,如果没触发的话就会报错跳出方法
WebDriverWait(self.driver, 3).until(EC.visibility_of_any_elements_located((By.XPATH, '//*[@class="yidun_tips"]')))
time.sleep(random.uniform(0.5, 2))
validation_elenment = self.driver.find_element(By.XPATH, '//*[@class="yidun_tips"]')
# 将鼠标移动到验证按钮上并点击
ActionChains(self.driver).move_to_element(validation_elenment).click().perform()
# 一个无限循环知道验证通过
while True:
try:
WebDriverWait(self.driver, 3).until(
EC.visibility_of_any_elements_located((By.XPATH, '//*[@class="yidun_bg-img"]')))
# 获取验证图片
validation_img = self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]').get_attribute('src')
# 获取要点击的字
validation_text = self.driver.find_element(By.XPATH, '//*[@class="yidun_tips__point"]').text
validation_text = validation_text.replace('"', '').replace(' ', '')
validation_location = self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]').location
# 获取图片长宽用来计算坐标
validation_size = self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]').size
validation_height = validation_size['height']
validation_width = validation_size['width']
# 获取图片链接并下载
ActionChains(self.driver).move_to_element(
self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]')).perform()
time.sleep(random.uniform(0.5, 2))
time.sleep(random.uniform(0.5, 2))
r = requests.get(validation_img)
fname = 'img/' + self.username + str(int(time.time())) + '.jpg'
with open(fname, 'wb') as f:
f.write(r.content)
print(f'{validation_img}写入成功')
# 这里是调用打码平台返回坐标的
verify_data = self.verify_code(fname)
count = 1
x = 0
y = 0
# 注意,下边的*0.71是我发现页面实际显示的图片大小是我下载的图片的百分之七十一,所以我们要换算一下
for i in validation_text:
# 因为我们获取的图片的坐标实际伤势图片中心点的坐标,所以我们计算要移动的距离是要用文字坐标减去图片长宽/2
# 第一次的起点是图片中心点,所以要判断一下,然后x和y要加上这次移动的坐标
if count == 1:
text_location_x = (verify_data[i]['X坐标值'] * 0.71) - (validation_width / 2)
text_location_y = (verify_data[i]['Y坐标值'] * 0.71) - (validation_height / 2)
x = (validation_width / 2) + text_location_x
y = (validation_height / 2) + text_location_y
# 从上次移动到的地方开始移动
else:
text_location_x = (verify_data[i]['X坐标值'] * 0.71) - x
text_location_y = (verify_data[i]['Y坐标值'] * 0.71) - y
x += text_location_x
y += text_location_y
print(i)
move_lenx = 0
move_leny = 0
"""以下是我自己写的模拟人类鼠标移动的方法,自测通过率百分之八十以上,可以直接复制"""
while True:
if text_location_x < 0:
if move_lenx <= text_location_x:
move_x = 0
else:
if text_location_x - move_lenx < -10:
move_x = random.uniform(-10, -5)
elif text_location_x - move_lenx < -5:
move_x = random.uniform(-5, -2)
else:
move_x = random.uniform(-2, 0)
else:
if move_lenx >= text_location_x:
move_x = 0
else:
if text_location_x - move_lenx > 10:
move_x = random.uniform(5, 10)
elif text_location_x - move_lenx > 5:
move_x = random.uniform(2, 5)
else:
move_x = random.uniform(0, 2)
if text_location_y < 0:
if move_leny <= text_location_y:
move_y = 0
else:
if text_location_y - move_leny < -10:
move_y = random.uniform(-10, -5)
elif text_location_y - move_leny < -5:
move_y = random.uniform(-5, -2)
else:
move_y = random.uniform(-2, 0)
else:
if move_leny >= text_location_y:
move_y = 0
else:
if text_location_y - move_leny > 10:
move_y = random.uniform(5, 10)
elif text_location_y - move_leny > 5:
move_y = random.uniform(2, 5)
else:
move_y = random.uniform(0, 2)
if text_location_x < 0 and text_location_y < 0:
if (move_lenx <= text_location_x) and (move_leny <= text_location_y):
break
elif text_location_x > 0 and text_location_y > 0:
if (move_lenx >= text_location_x) and (move_leny >= text_location_y):
break
elif text_location_x > 0 and text_location_y < 0:
if (move_lenx >= text_location_x) and (move_leny <= text_location_y):
break
elif text_location_x < 0 and text_location_y > 0:
if (move_lenx <= text_location_x) and (move_leny >= text_location_y):
break
ActionChains(self.driver).move_by_offset(move_x, move_y).perform()
move_lenx += move_x
move_leny += move_y
print(text_location_y, move_leny, 'yyyyyyyyyyyyyyy')
print(text_location_x, move_lenx, 'xxxxxxxxxxxxxxx')
# if count != 1:
# ActionChains(driver).move_by_offset(-50, 0).click().perform()
# else:
# ActionChains(driver).move_by_offset(0, 0).click().perform()
ActionChains(self.driver).move_by_offset(random.uniform(-5, 5), random.uniform(-5, 5)).click().perform()
time.sleep(random.uniform(0.3, 1.5))
count += 1
time.sleep(3)
try:
ActionChains(self.driver).move_to_element(
self.driver.find_element(By.XPATH, '//*[@class="yidun_bg-img"]')).perform()
except:
time.sleep(1)
self.driver.find_element(By.ID, 'dologin').click()
break
except:
time.sleep(1)
except:
traceback.print_exc()
def get_email(self):
if self.is_search and self.is_date is False:
self.serach(self.is_search)
elif self.is_date and self.is_search is False:
where_date = f'{self.is_date}:{self.end_data}'
self.search_date(where_date)
elif self.is_date and self.is_search:
self.serach(self.is_search)
self.driver.refresh()
fly_id = 0
while True:
try:
WebDriverWait(self.driver, 10).until(
EC.visibility_of_any_elements_located((By.XPATH, f'//*[@id="fly{fly_id}"]')))
except:
self.driver.close()
break
for num in range(2, 22):
try:
email_date = self.driver.find_elements(By.XPATH, f'//*[@class="Dg0"]/div[3]/div[{num}]/div/div[3]/div[1]')[0].get_attribute('title')
email_date = re.findall('\d+年\d+月\d+日', email_date)[0].replace('年', '').replace('月', '').replace('日', '')
email_date = self.time_disposal(email_date)
if self.time_disposal(self.is_date) <= email_date <= self.time_disposal(self.end_data):
comfirmdel = self.driver.find_element(By.XPATH, f'//*[@class="Dg0"]/div[3]/div[{num}]//label')
self.driver.execute_script("arguments[0].click();", comfirmdel)
# self.driver.find_element(By.XPATH, '//*[@class="tv0"]/div[2]//label').click()
except:
traceback.print_exc()
comfirmdel = self.driver.find_elements(By.XPATH, f'//*[@role="toolbar"]/div[4]/div[last()]')[1]
self.driver.execute_script("arguments[0].click();", comfirmdel)
WebDriverWait(self.driver, 10).until(
EC.visibility_of_any_elements_located((By.XPATH, "//span[text()='导出选中邮件']")))
self.driver.find_element(By.XPATH, "//span[text()='导出选中邮件']").click()
WebDriverWait(self.driver, 10).until(
EC.visibility_of_any_elements_located((By.XPATH, "//span[text()='下一页']")))
comfirmdel = self.driver.find_element(By.XPATH, "//span[text()='下一页']")
self.driver.execute_script("arguments[0].click();", comfirmdel)
fly_id += 1
self.driver.refresh()
time.sleep(3)
if self.is_date is False or self.is_search is False:
fly_id = 0
while True:
try:
WebDriverWait(self.driver, 10).until(
EC.visibility_of_any_elements_located((By.XPATH, f'//*[@id="fly{fly_id}"]')))
except:
self.driver.close()
break
self.driver.find_element(By.XPATH, f'//*[@id="fly{fly_id}"]').click()
self.driver.find_element(By.XPATH, '//*[@role="toolbar"]/div[4]/div[last()]').click()
WebDriverWait(self.driver, 10).until(EC.visibility_of_any_elements_located((By.XPATH, "//span[text()='导出选中邮件']")))
self.driver.find_element(By.XPATH, "//span[text()='导出选中邮件']").click()
try:
WebDriverWait(self.driver, 10).until(EC.visibility_of_any_elements_located((By.XPATH, "//span[text()='下一页']")))
except:
traceback.print_exc()
self.driver.close()
break
self.driver.find_element(By.XPATH, "//span[text()='下一页']").click()
fly_id += 1
self.decompression()
def decompression(self):
userfile = self.create_file()
decompression_file = userfile + 'decompression' + '\\'
try:
os.mkdir(decompression_file)
except:
pass
file_dir = userfile
for root, dirs, files in os.walk(file_dir, topdown=False):
for i in files:
zip_path = userfile + i
f = zipfile.ZipFile(zip_path, 'a')
for zip_file in f.namelist():
try:
f.extract(zip_file, decompression_file)
except:
traceback.print_exc()
f.close()
def email_main(self):
self.driver.get('https://mail.163.com/')
WebDriverWait(self.driver, 10).until(EC.visibility_of_any_elements_located((By.XPATH, '//*[@id="loginDiv"]/iframe')))
time.sleep(random.uniform(0.5, 2))
iframe = self.driver.find_element(By.XPATH, '//*[@id="loginDiv"]/iframe')
self.driver.switch_to.frame(iframe)
WebDriverWait(self.driver, 10).until(EC.visibility_of_any_elements_located((By.XPATH, '//*[@name="email"]')))
time.sleep(random.uniform(0.5, 2))
self.driver.find_element(By.XPATH, '//*[@name="email"]').send_keys(self.username)
time.sleep(random.uniform(0.5, 2))
self.driver.find_element(By.XPATH, '//*[@name="password"]').send_keys(self.password)
time.sleep(random.uniform(0.5, 2))
self.driver.find_element(By.ID, 'dologin').click()
self.verification()
WebDriverWait(self.driver, 10).until(EC.visibility_of_any_elements_located((By.XPATH, '//*[@id="dvMultiTabWrapper"]')))
self.driver.find_element(By.XPATH, '//*[@id="dvMultiTabWrapper"]//ul/li[4]').click()
self.get_email()
def serach(self, key):
time.sleep(3)
WebDriverWait(self.driver, 10).until(
EC.visibility_of_any_elements_located((By.XPATH, '//*[@role="search"]//input')))
self.driver.find_element(By.XPATH, '//*[@role="search"]//input').send_keys(key)
time.sleep(5)
self.driver.find_element(By.XPATH, '//*[@role="search"]//input').send_keys(Keys.ENTER)
time.sleep(5)
def search_date(self, date):
now_url = self.driver.current_url
print(now_url)
user_sid = re.findall('sid=(.*?)&', now_url)[0]
next_url = 'https://mail.163.com/js6/main.jsp?sid=%s&df=mail163_letter#module=mbox.ListModule|{"order":"date","desc":true,"fids":[1],"filter":{"sentDate":"%s"}}' % (user_sid, date)
self.driver.get(next_url)
username = 'xx'
password = 'xx'
get_email = email_163(username, password, is_search='在', start_date='20220101', end_date='20220614')
get_email.email_main()
总结
算坐标的时候脑子都快爆炸了
模拟人类鼠标移动那有点瑕疵,好在通过率极高
备注
本脚本只限个人学习用,如用作他用产生任何责任与作者无关