打开浏览器,登录 “龙de船人”
需要下载selenium,下载浏览器对应版本chromedriver至python安装目录下
1.1在浏览器输入chrome://version,即可查询版本号
![](https://img-blog.csdnimg.cn/img_convert/28644fff2a36bd27a71af3addb9a19f5.png)
1.2下载对应版本chrome浏览器内核文件至python安装目录下
下载地址:http://chromedriver.storage.googleapis.com/index.html
选择正确的版本号
![](https://img-blog.csdnimg.cn/img_convert/648c2ca30e5de50f2f05c6cb15ee4257.png)
windows64位选win32即可
![](https://img-blog.csdnimg.cn/img_convert/c88a2b944a33e57172d5179716270f15.png)
解压放到python安装目录
![](https://img-blog.csdnimg.cn/img_convert/9d0b813e407fda355924d59a0c7c3b03.png)
打开页面代码如下:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def __init__(self):
super(CrackSlider, self).__init__()
self.opts = Options()
self.opts.add_argument('--no-sandbox') # 沙箱机制
self.driver = webdriver.Chrome(options=self.opts)
self.login_url = "https://www.imarine.cn/member.php?mod=logging&action=login"
try:
self.driver.get(self.login_url) # 由于是使用浏览器直接访问页面,因此无需区分 get 和 post 方法,直接 get 方法打开页面即可
except Exception as e:
print("开始!")
# 等待2秒钟
time.sleep(2)
自动登录(难点:滑块验证)
from io import BytesIO
import cv2
import numpy as np
from PIL import Image
from selenium import webdriver
from selenium.webdriver import ActionChains
def __init__(self):
super(CrackSlider, self).__init__()
# 打开浏览器,代码省略
# 等待2秒钟
time.sleep(2)
# 输入账号密码
self.driver.find_element(by=By.XPATH,
value="/html/body/div[1]/main/div/div[1]/div/div[2]/div[1]/div/form/div[1]/div["
"2]/div[1]/input").send_keys("账号")
time.sleep(1)
self.driver.find_element(by=By.XPATH,
value="/html/body/div[1]/main/div/div[1]/div/div[2]/div[1]/div/form/div[2]/div["
"2]/div[1]/input").send_keys("密码")
time.sleep(1)
self.driver.find_element(by=By.XPATH,
value="/html/body/div[1]/main/div/div[1]/div/div[2]/div[1]/div/form/button").click()
time.sleep(1)
self.wait = WebDriverWait(self.driver, 10)
def get_pic(self):
# self.driver.get(self.login_url)
time.sleep(5)
self.driver.switch_to.frame('tcaptcha_iframe')
# 定位需要滑动的元素
target_link = self.driver.find_element(by=By.XPATH, value="/html/body/div/div[3]/div[2]/div[1]/div[2]/img") \
.get_attribute('src')
template_link = self.driver.find_element(by=By.XPATH, value="/html/body/div/div[3]/div[2]/div[1]/div[3]/img") \
.get_attribute('src')
target_img = Image.open(BytesIO(requests.get(target_link).content))
template_img = Image.open(BytesIO(requests.get(template_link).content))
target_img.save('target.jpg')
template_img.save('template.png')
def crack_slider(self, distance):
slider = self.driver.find_element(by=By.ID, value='tcaptcha_drag_thumb')
ActionChains(self.driver).click_and_hold(slider).perform()
ActionChains(self.driver).move_by_offset(xoffset=distance, yoffset=0).perform()
time.sleep(2)
ActionChains(self.driver).release().perform()
ActionChains(self.driver).click(slider).perform()
def add_alpha_channel(img):
""" 为jpg图像添加alpha通道 """
r_channel, g_channel, b_channel = cv2.split(img) # 剥离jpg图像通道
alpha_channel = np.ones(b_channel.shape, dtype=b_channel.dtype) * 255 # 创建Alpha通道
img_new = cv2.merge((r_channel, g_channel, b_channel, alpha_channel)) # 融合通道
return img_new
def handel_img(img):
imgGray = cv2.cvtColor(img, cv2.COLOR_RGBA2GRAY) # 转灰度图
imgBlur = cv2.GaussianBlur(imgGray, (5, 5), 1) # 高斯模糊
imgCanny = cv2.Canny(imgBlur, 60, 60) # Canny算子边缘检测
return imgCanny
def match(img_jpg_path, img_png_path):
# 读取图像
img_jpg = cv2.imread(img_jpg_path, cv2.IMREAD_UNCHANGED)
img_png = cv2.imread(img_png_path, cv2.IMREAD_UNCHANGED)
# 判断jpg图像是否已经为4通道
if img_jpg.shape[2] == 3:
img_jpg = add_alpha_channel(img_jpg)
img = handel_img(img_jpg)
small_img = handel_img(img_png)
res_TM_CCOEFF_NORMED = cv2.matchTemplate(img, small_img, 3)
value = cv2.minMaxLoc(res_TM_CCOEFF_NORMED)
value = value[3][0] # 获取到移动距离
return value
def job():
# 1. 打开chromedriver,下载图片
cs = CrackSlider()
cs.get_pic()
# 2. 对比图片,计算距离
img_jpg_path = 'target.jpg' # 读者可自行修改文件路径
img_png_path = 'template.png' # 读者可自行修改文件路径
distance = match(img_jpg_path, img_png_path)
distance = distance / 680 * 340 - 25 # 自我调整比例
# 3. 移动
cs.crack_slider(distance)
船舶订单查询
根据元素full xpath定位元素,进行点击或是文本输入
def query(self):
time.sleep(10)
print("正在点击查询按钮......")
time.sleep(5)
# 选择查询
query = self.driver.find_element(by=By.XPATH, value="/html/body/div[6]/div/div[1]/div[2]/a[5]")
time.sleep(3)
query.click()
time.sleep(5)
print("正在输入查询条件......")
# 选择开始时间
self.driver.find_element(by=By.XPATH, value="/html/body/div[8]/form/div/div[2]/dl/dd[1]/input[1]").click()
# 年份
self.driver.find_element(by=By.XPATH,
value="/html/body/div[1]/div/div[1]/div/table/tbody/tr[1]/td[2]/a[1]").click()
self.driver.find_element(by=By.XPATH, value="/html/body/div[1]/div/div[2]/div[7]/a[9]/span").click()
# 月份
self.driver.find_element(by=By.XPATH,
value="/html/body/div[1]/div/div[1]/div/table/tbody/tr[1]/td[2]/a[2]").click()
self.driver.find_element(by=By.XPATH, value="/html/body/div[1]/div/div[3]/a[1]/span").click()
# 日期
self.driver.find_element(by=By.XPATH, value="/html/body/div[1]/div/div[1]/div/table/tbody/tr[3]/td[7]").click()
# 选择结束时间
self.driver.find_element(by=By.XPATH, value="/html/body/div[8]/form/div/div[2]/dl/dd[1]/input[2]").click()
# 当前日期
self.driver.find_element(by=By.XPATH,
value="/html/body/div[1]/div/div[1]/div/table/tbody/tr[5]/td[4]/a").click()
# 输入国家-中国
self.driver.find_element(by=By.XPATH, value="/html/body/div[8]/form/div/div[2]/dl/dd[5]/input").send_keys("中国")
# 点击搜索
self.driver.find_element(by=By.XPATH, value="/html/body/div[8]/form/div/div[2]/dl/dd[7]/button/span").click()
time.sleep(2)
return 0
获取表单内容
def get_and_update(self):
td = self.driver.find_element(by=By.XPATH, value="/html/body/div[8]/div[1]") # 进一步定位到表格内容所在的td节点
td_txt = td.text
print(td_txt)
arr = td_txt.split("\n")
for y in range(3, len(arr)):
arr1 = arr[y].split(" ")
tup1 = (arr1[0], arr1[1], arr1[2], arr1[3], arr1[4], arr1[6], arr1[7])
if arr1[0] < '2022': # 只取2022年后数据
return False
self.arr_res.append(tup1) # 所有结果汇聚于此
数据处理
import cx_Oracle
from sys import modules
# 连接Oracle数据库
class oracleOperation():
def openOracleConn(self):
# highway = cx_Oracle.connect('c##sxx/c##sxx@localhost:1521/orcltest') # 用户名/密码@host:端口/sid
highway = cx_Oracle.connect('用户名/密码@host:端口/sid') # 用户名/密码@host:端口/sid
# 获取cursor指针
# cursor=highway.cursor()
return highway
# 条件查询
def factorSelect(self, connection, param):
cursor = connection.cursor()
# 带参数的查询 ,例子如下:
sql = 'select * from Longde where create_date =:create_date and dockyard =:dockyard ' \
'and ship_type =:ship_type and quantity =:quantity and specification =:specification ' \
'and region =:region and shipowner =:shipowner'
query1 = cursor.execute(sql, param) # 特别的注意,具体 的条件查询的格式
row = cursor.fetchall()
cursor.close()
# connection.close() # 连接关闭在所有数据处理完成后
if len(row) != 0:
return True
else:
return False
pass
def insert(self, connection, insertParam=[]):
cursor = connection.cursor()
sql = "insert into Longde (create_date,dockyard,ship_type,quantity,specification,region,shipowner) " \
"values (:create_date,:dockyard,:ship_type,:quantity,:specification,:region,:shipowner)"
for i in range(0, len(insertParam)):
if insertParam[i] == '-':
insertParam[i] = None
if len(insertParam) == 0:
print("插入的数据行的参数不能为空!")
else:
cursor.prepare(sql)
result = cursor.executemany(None, insertParam)
connection.commit()
cursor.close()
pass
if __name__ == '__main__':
db = oracleOperation()
connection = db.openOracleConn()
# 能运行的无条件查询语句
db.select(connection)
定时任务
from apscheduler.schedulers.blocking import BlockingScheduler
def job():
""" 反复执行内容 """
if __name__ == "__main__":
scheduler = BlockingScheduler()
scheduler.add_job(job, 'interval', seconds=180) # job为上面方法执行方法名
scheduler.start()
代码(无数据库操作部分)
# coding=utf-8
import datetime
import re
import requests
import time
from io import BytesIO
import cv2
import numpy as np
from PIL import Image
from apscheduler.schedulers.blocking import BlockingScheduler
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.chrome.options import Options
from helloworld.Longde import db_oracle_Longde
class CrackSlider:
# 通过浏览器截图,识别验证码中缺口位置,获取需要滑动距离,并破解滑动验证码
def __init__(self):
super(CrackSlider, self).__init__()
# 打开浏览器
self.opts = Options()
self.opts.add_argument('--no-sandbox') # Bypass OS security model
self.driver = webdriver.Chrome(options=self.opts)
self.login_url = "https://www.imarine.cn/member.php?mod=logging&action=login"
self.Longde_url = "https://www.imarine.cn/order/?wtime=&shipyard=&rocker=&power=&countryarea=&shipowner=" # 新造船订单查询
self.arr_res = [] # 存储为list
try:
self.driver.get(self.login_url)
except Exception as e:
print("开始!")
# 等待2秒钟
time.sleep(2)
# 输入账号密码
self.driver.find_element(by=By.XPATH,
value="/html/body/div[1]/main/div/div[1]/div/div[2]/div[1]/div/form/div[1]/div["
"2]/div[1]/input").send_keys("账号")
time.sleep(1)
self.driver.find_element(by=By.XPATH,
value="/html/body/div[1]/main/div/div[1]/div/div[2]/div[1]/div/form/div[2]/div["
"2]/div[1]/input").send_keys("密码")
time.sleep(1)
self.driver.find_element(by=By.XPATH,
value="/html/body/div[1]/main/div/div[1]/div/div[2]/div[1]/div/form/button").click()
time.sleep(1)
self.wait = WebDriverWait(self.driver, 10)
def get_pic(self):
# self.driver.get(self.login_url)
time.sleep(5)
self.driver.switch_to.frame('tcaptcha_iframe')
# 定位需要滑动的元素
target_link = self.driver.find_element(by=By.XPATH, value="/html/body/div/div[3]/div[2]/div[1]/div[2]/img") \
.get_attribute('src')
template_link = self.driver.find_element(by=By.XPATH, value="/html/body/div/div[3]/div[2]/div[1]/div[3]/img") \
.get_attribute('src')
target_img = Image.open(BytesIO(requests.get(target_link).content))
template_img = Image.open(BytesIO(requests.get(template_link).content))
target_img.save('target.jpg')
template_img.save('template.png')
def crack_slider(self, distance):
slider = self.driver.find_element(by=By.ID, value='tcaptcha_drag_thumb')
ActionChains(self.driver).click_and_hold(slider).perform()
ActionChains(self.driver).move_by_offset(xoffset=distance, yoffset=0).perform()
time.sleep(2)
ActionChains(self.driver).release().perform()
ActionChains(self.driver).click(slider).perform()
def query(self):
time.sleep(10)
print("正在点击查询按钮......")
time.sleep(5)
# 选择查询
query = self.driver.find_element(by=By.XPATH, value="/html/body/div[6]/div/div[1]/div[2]/a[5]")
time.sleep(3)
query.click()
time.sleep(5)
print("正在输入查询条件......")
# 选择开始时间
self.driver.find_element(by=By.XPATH, value="/html/body/div[8]/form/div/div[2]/dl/dd[1]/input[1]").click()
# 年份
self.driver.find_element(by=By.XPATH,
value="/html/body/div[1]/div/div[1]/div/table/tbody/tr[1]/td[2]/a[1]").click()
self.driver.find_element(by=By.XPATH, value="/html/body/div[1]/div/div[2]/div[7]/a[9]/span").click()
# 月份
self.driver.find_element(by=By.XPATH,
value="/html/body/div[1]/div/div[1]/div/table/tbody/tr[1]/td[2]/a[2]").click()
self.driver.find_element(by=By.XPATH, value="/html/body/div[1]/div/div[3]/a[1]/span").click()
# 日期
self.driver.find_element(by=By.XPATH, value="/html/body/div[1]/div/div[1]/div/table/tbody/tr[3]/td[7]").click()
# 选择结束时间
self.driver.find_element(by=By.XPATH, value="/html/body/div[8]/form/div/div[2]/dl/dd[1]/input[2]").click()
# 当前日期
self.driver.find_element(by=By.XPATH,
value="/html/body/div[1]/div/div[1]/div/table/tbody/tr[5]/td[4]/a").click()
# 输入国家-中国
self.driver.find_element(by=By.XPATH, value="/html/body/div[8]/form/div/div[2]/dl/dd[5]/input").send_keys("中国")
# 点击搜索
self.driver.find_element(by=By.XPATH, value="/html/body/div[8]/form/div/div[2]/dl/dd[7]/button/span").click()
time.sleep(2)
return 0
def get_order(self):
self.db = db_oracle_Longde.oracleOperation()
self.connection = self.db.openOracleConn()
pageNum = self.driver.find_element(by=By.XPATH, value="/html/body/div[8]/div[2]/div/label/span").text
pageNum = pageNum.strip("/ ").strip(" 页")
print("第一次--> 一共有" + pageNum + "页")
self.get_and_update()
page = self.driver.find_element(by=By.CLASS_NAME, value="nxt")
page.click()
time.sleep(2)
# 第一次进入页面时显示的总页数不正确,点击下一页后显示的才是正确的总页数
pageNum = self.driver.find_element(by=By.XPATH, value="/html/body/div[8]/div[2]/div/label/span").text
pageNum = pageNum.strip("/ ").strip(" 页")
print("点击后--> 一共有" + pageNum + "页")
# 提取表格内容td,减去上面已获取的一页内容
for i in range(1, int(pageNum) - 1):
print('现在是第', str(i), '页')
flag = self.get_and_update()
if flag is False:
return False
page = self.driver.find_element(by=By.CLASS_NAME, value="nxt")
page.click()
time.sleep(2)
self.get_and_update()
print("所有数据:")
print(self.arr_res)
self.driver.quit()
# 关闭数据库连接
self.connection.close()
def get_and_update(self):
td = self.driver.find_element(by=By.XPATH, value="/html/body/div[8]/div[1]") # 进一步定位到表格内容所在的td节点
td_txt = td.text
print(td_txt)
arr = td_txt.split("\n")
for y in range(3, len(arr)):
arr1 = arr[y].split(" ")
tup1 = (arr1[0], arr1[1], arr1[2], arr1[3], arr1[4], arr1[6], arr1[7])
if arr1[0] < '2022': # 只取2022年后数据
return False
self.arr_res.append(tup1)
dict_select = {'create_date': arr1[0], 'dockyard': arr1[1], 'ship_type': arr1[2],
'quantity': arr1[3], 'specification': arr1[4], 'region': arr1[6],
'shipowner': arr1[7]}
res = self.db.factorSelect(self.connection, dict_select)
list_insert = [(arr1[0], arr1[1], arr1[2], arr1[3], arr1[4], arr1[6], arr1[7])]
if res is False: # insert
self.db.insert(self.connection, list_insert)
return True
def add_alpha_channel(img):
""" 为jpg图像添加alpha通道 """
r_channel, g_channel, b_channel = cv2.split(img) # 剥离jpg图像通道
alpha_channel = np.ones(b_channel.shape, dtype=b_channel.dtype) * 255 # 创建Alpha通道
img_new = cv2.merge((r_channel, g_channel, b_channel, alpha_channel)) # 融合通道
return img_new
def handel_img(img):
imgGray = cv2.cvtColor(img, cv2.COLOR_RGBA2GRAY) # 转灰度图
imgBlur = cv2.GaussianBlur(imgGray, (5, 5), 1) # 高斯模糊
imgCanny = cv2.Canny(imgBlur, 60, 60) # Canny算子边缘检测
return imgCanny
def match(img_jpg_path, img_png_path):
# 读取图像
img_jpg = cv2.imread(img_jpg_path, cv2.IMREAD_UNCHANGED)
img_png = cv2.imread(img_png_path, cv2.IMREAD_UNCHANGED)
# 判断jpg图像是否已经为4通道
if img_jpg.shape[2] == 3:
img_jpg = add_alpha_channel(img_jpg)
img = handel_img(img_jpg)
small_img = handel_img(img_png)
res_TM_CCOEFF_NORMED = cv2.matchTemplate(img, small_img, 3)
value = cv2.minMaxLoc(res_TM_CCOEFF_NORMED)
value = value[3][0] # 获取到移动距离
return value
def job():
# 1. 打开chromedriver,下载图片
cs = CrackSlider()
cs.get_pic()
# 2. 对比图片,计算距离
img_jpg_path = 'target.jpg' # 读者可自行修改文件路径
img_png_path = 'template.png' # 读者可自行修改文件路径
distance = match(img_jpg_path, img_png_path)
distance = distance / 680 * 340 - 25
# 3. 移动
cs.crack_slider(distance)
# 4. 查询
cs.query()
cs.get_order()
if __name__ == "__main__":
print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
# BlockingScheduler 定时任务,毎3分钟获取一次数据
scheduler = BlockingScheduler()
scheduler.add_job(job, 'interval', seconds=180)
scheduler.start()
# job()