# encoding=utf-8
import sys
sys.path.append('/home/henson/Documents/coding/bill/captcha_recognition')
from captcha_recognition.recognition_img import distinguish_captcha
from PIL import Image,ImageDraw,ImageChops
from selenium.common.exceptions import NoSuchElementException
from PIL import Image
import selenium
import sys
import time
import re
import csv
import pytesser
# 打开验证码界面
import os
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
chromedriver = "/home/henson/Documents/pycharm/webdriver/chromedriver"
os.environ["webdriver.chrome.driver"] = chromedriver
driver = webdriver.Chrome(chromedriver)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
"""
driver.get('http://www******')
#driver.get(url)
for i in range(1,1000):
driver.get_screenshot_as_file('screenshot.png')
# 获取指定元素位置
element = driver.find_element_by_id('imgCode')
left = int(element.location['x'])
top = int(element.location['y'])
right = int(element.location['x'] + element.size['width'])
bottom = int(element.location['y'] + element.size['height'])
# 通过Image处理图像
im = Image.open('screenshot.png')
im = im.crop((left, top, right, bottom))
im.save('/home/henson/Documents/coding/bill/code/code+('+str(i)+').png')
driver.find_element_by_xpath('//*[@id="imgCode"]').click() # 切换验证码
"""
def get_img(url):
try:
# driver.get('*****')
driver.get(url)
# 获取截图
url_core = driver.current_url
current_url = url_core
while (current_url == url_core):
driver.get_screenshot_as_file('screenshot.png')
# 获取指定元素位置
element = driver.find_element_by_id('imgCode')
left = int(element.location['x'])
top = int(element.location['y'])
right = int(element.location['x'] + element.size['width'])
bottom = int(element.location['y'] + element.size['height'])
# 通过Image处理图像
im = Image.open('screenshot.png')
im = im.crop((left, top, right, bottom))
im.save('code.png')
inputElement = driver.find_element_by_name("txtCode")
#get_img(url_core) # 获取验证码的图片
image = Image.open('code.png')
code = distinguish_captcha(image) # 返回识别结果
print(code)
inputElement.send_keys(code)
driver.find_element_by_xpath('//*[@id="btnOk"]').click()
try:
alert = driver.switch_to_alert()
time.sleep(2)
print(alert.text) # 打印警告对话框内容
alert.accept() # 关闭弹出的窗口
driver.find_element_by_name("txtCode").clear()
except Exception:
pass
current_url = driver.current_url
#driver.close()
except Exception:
print(Exception)
1.先获取验证码的图片,此处用的是selenium+driver 截屏功能,再对获得的图片进行切割
2.导入了人家的captcha_recognition,用来做验证码的识别
captcha_recognition包里,对有验证码进行二值话,切割….不过最好根据自己的验证码进行收集,切割样本手动分类,提高识别度。
=======================>>>>>>
实践感觉效果算是不错的了,可以一用