webdriver爬虫启动chrome浏览器报错
报错:unknown error: chrome failed to start
WebDriverException: Message: unknown error: Chrome failed to start: crashed
WebDriverException: Message: unknown error: Chrome failed to start:
如果别的方法都不行的话(前提是你试过其他人的)
:解决方法—>你是否配过环境变量?如果是,重装chrome,然后环境变量配到C:\Users\admin\AppData\Local\Google\Chrome
顺带传一个自己写的小玩意:
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys #引入键盘
import pytesseract
from PIL import Image, ImageEnhance
import time
from aip import AipOcr
import os
all_num = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
'W', 'X', 'Y', 'Z',
'1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
#///验证码识别,调用百度api,账号是别人的,慎重对待
def baidu_image_to_word(image_path):
""" 你的 APPID AK SK """
APP_ID = '14364432'
API_KEY = 'jgopMYaecGeGgaBr2EYWKNDZ'
SECRET_KEY = 'TnpKrHyyc3IgrGw2L5ZzKRiY9F2seCSk'
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
with open(image_path, 'rb') as fp:
image = fp.read()
""" 调用通用文字识别, 图片参数为本地图片 """
client.basicGeneral(image);
""" 如果有可选参数 """
options = {}
options["language_type"] = "ENG"
options["detect_direction"] = "false" # 是否检测图像朝向,默认不检测,即:false true
##options["detect_language"] = "true"#是否检测语言,默认不检测
options["probability"] = "true" # 是否返回识别结果中每一行的置信度
""" 带参数调用通用文字识别, 图片参数为本地图片 """
res = client.basicAccurate(image, options) # 通用文字识别(高精度版),普通版是client.basicGeneral(image, options);
##""" 调用通用文字识别, 图片参数为远程url图片 """
##url = "https//www.x.com/sample.jpg"
##client.basicGeneralUrl(url,options);
try:
guess = res['words_result'][0]['words']
probability = res['words_result'][0]['probability']['average']
except:
print("识别失败,将置信度归为0,文字为空")
guess = '';
probability = 0;
return guess, probability, res
#
def get_code(file):
image = file
probability = 0;
count = 0;
count_max = 30;
words_num = 0
##图片文字识别的置信度大于0.9的时候才说明识别得比较准确,否则刷新验证码重新识别
while (probability < 0.92 or words_num <= 4) and count < count_max:
# if count > 0:
# browser.find_element_by_xpath("//*[@id='imgCodeId']").click()
# time.sleep(2)
print("识别认证码中...请稍等")
count += 1
##增强图形识别率的处理
i2 = Image.open(image)
imgry = i2.convert('L') # 图像加强,二值化,PIL中有九种不同模式。分别为1,L,P,RGB,RGBA,CMYK,YCbCr,I,F。L为灰度图像
sharpness = ImageEnhance.Contrast(imgry) # 对比度增强
i3 = sharpness.enhance(3.0) # 3.0为图像的饱和度
i3.save("D://AAAAtest/result.png")
##连接api获取返回结果
guess, probability, res = baidu_image_to_word('D://AAAAtest/result.png') # 连接百度api
words_num = len(guess)
print('第%d次猜测验证码,猜测结果为%s,猜测验证码个数为%d,置信度平均值为%f' % (count, guess, words_num, probability))
##保证返回结果里面含有刚好5个数字+字母的组合
check_num = 0;
if ' ' in guess:
guess = guess.replace(' ', '')
for single in guess:
if single in all_num:
check_num += 1
if ' ' in guess:
if len(guess.replace(' ', '')) == 6 and check_num == 6:
guess = guess.replace(' ', '')
words_num = len(guess)
else:
print("pass!猜测个数不对或者猜测值不是数据和字母")
probability = 0
# os.rename('result.png', 'result.png') ##将验证码图片重命名成含识别结果以及平均置信度的名字
time.sleep(2)
return guess
#///验证码识别,调用百度api
if __name__=="__main__":
driver = webdriver.Chrome()
driver.maximize_window()
user_id = input("学号:")
password = input("密码:")
leibie = input("课程类别(按照选课系统官方名称):")
#TODO 模糊识别,仅输入二字简称即可,2 增加课程类型, 3! 只需输入课程名称即可对应生成相应信息
leixing = input("课程类型(选填:一般/核心):")
if leixing == "一般":
leixing = "一般通识选修课"
else:
leixing = "核心通识选修课"
classmane = input("课程名称(务必准确):")
driver.get('http://***************/cas/login?service=http://jxgl.hdu.edu.cn/default.aspx')
time.sleep(1)
driver.get('http://**************/cas/login?service=http://jxgl.hdu.edu.cn/default.aspx') # 重载页面
time.sleep(1)
driver.find_element_by_xpath("/html/body/div[2]/div[2]/div[1]/div/form/input[1]").click()
driver.find_element_by_xpath("/html/body/div[2]/div[2]/div[1]/div/form/input[1]").send_keys(user_id)
time.sleep(1)
driver.find_element_by_xpath("/html/body/div[2]/div[2]/div[1]/div/form/input[2]").click()
driver.find_element_by_xpath("/html/body/div[2]/div[2]/div[1]/div/form/input[2]").send_keys(password)
time.sleep(1)
driver.find_element_by_xpath("/html/body/div[2]/div[2]/div[1]/div/form/a").click()
d = driver.current_url
print(d)
print("登陆成功")
time.sleep(2)
# 共sleep 6s
driver.get('http://jxgl.hdu.edu.cn/xs_main.aspx?xh=' + str(user_id)) # 加载新页面
for i in range(1,24):
print("第"+str(i)+"尝试")
time.sleep(3)
driver.find_element_by_xpath("/html/body/div/div[1]/ul/li[2]/a/span").click()#主按键
driver.find_element_by_xpath("//a[contains(@href, 'xf_xsqxxxk')]").click()#列表选择
# //html/body/div/div[1]/ul/li[2]/a/span网上选课/html/body/div/div[1]/ul/li[5]/a/span信息查询
# //a[contains(@href, 'xf_xsqxxxk')]通识选修课 //a[contains(@href, 'xsxk')]普通理论课课 //a[contains(@href, 'tjkbcx')]专业课表查询
#移动鼠标
# b_label_location = driver.find_element_by_xpath("/html/body/form/div[3]/div[3]/p[1]/select[1]")
# mouse_action = ActionChains(driver)
# mouse_action.move_to_element(to_element).perform()
# mouse_action.perform()
# 移动鼠标
print("进入选课界面")
time.sleep(1)
# driver.switch_to.frame("iframeautoheight")
try:
driver.switch_to.frame("iframeautoheight")
except:
print("进入iframe失败")
continue
#切换进入iframe!!!
time.sleep(1)
# 至此完成登录模块
# /选课列表
# s1 = Select(driver.find_element_by_xpath("/html/body/form/div[3]/div[3]/p[1]/select[1]")) #实例化Select
try:
s1 = Select(driver.find_element_by_xpath("/html/body/form/div[3]/div[3]/p[1]/select[1]")) #实例化Select
except:
print("进入Select_1失败")
continue
s1.select_by_value(leibie)
time.sleep(1)
#TODO 两个Select添加try
# s2 = Select(driver.find_element_by_xpath("/html/body/form/div[3]/div[3]/p[1]/select[3]")) # 实例化Select
try:
s2 = Select(driver.find_element_by_xpath("/html/body/form/div[3]/div[3]/p[1]/select[3]")) # 实例化Select
except:
print("进入Select_2失败")
continue
s2.select_by_visible_text(leixing)
time.sleep(1)
driver.find_element_by_xpath("/html/body/form/div[3]/div[5]/p[1]/input[2]").click()
time.sleep(2)
print("选择类别完成")
#打印课程列表
flag = 0
dr = driver.find_element_by_xpath("/html/body/form/div[4]/div/span/fieldset[1]/table/tbody")
row = dr.find_elements_by_tag_name('tr')
list = []
count_1 = 0
for i in row:
j = i.find_elements_by_tag_name('td')
count_1 += 1
for item in j:
text = item.text
list.append(text)
if flag ==0:
if text == classmane:
flag += 1
outnum = count_1
#TODO 这部分还需要继续优化
print("/")
# try :
print(list)
# except:
# pass
# # logging.info('返回的列表数据是{0}'.format(list)) 打印日志
print("/")
if(flag>0):
url = "/html/body/form/div[4]/div/span/fieldset[1]/table/tbody/tr["+str(outnum)+"]/td[1]/input"
print(url)
driver.find_element_by_xpath(str(url)).click()
time.sleep(2)
for m in range(1,100):
driver.find_element_by_xpath("/html/body").send_keys(Keys.DOWN)
time.sleep(3)
driver.save_screenshot("D://AAAAtest/01.png")
#
# 3、打开截图,获取验证码位置,截取保存验证码
ran = Image.open("D://AAAAtest/01.png")
box = (1733, 813, 1786, 830) # 获取验证码位置,使用了手动定位,代表(左,上,右,下)
ran.crop(box).save("D://AAAAtest/02.png")
mycode = get_code("D://AAAAtest/02.png")
print(mycode)
driver.find_element_by_xpath("/html/body/form/div[4]/div/div[2]/em/span[2]/span/input").send_keys(mycode)
time.sleep(1)
driver.find_element_by_xpath("/html/body/form/div[4]/div/div[2]/em/span[2]/input[1]").click()
print("finish !!!")
driver.switch_to.default_content() #跳出iframe
#driver.find_element_by_xpath("/html/body/div/div[1]/ul/li[1]/a/span").click() #回到首页