以下两种方式都使用了百度 API 文字识别接口,需要指导的可以去 这里,运行只需把代码中“*”部分替换自己的账号密码即可。
selenium
selenium 的测试功能可以模拟人进行浏览器操作,这个方法比较简单和使用,但是忽忽悠的网页是用各种 Frame 搭建的,如果用这个方法爬会比较麻烦,要去抓包找到对应的request url 才行,我嫌烦直接截图保存了查询结果,下面贴出代码:
import time
from selenium import webdriver
from aip import AipOcr
import re
class sele_spider:
homeurl = "http://202.119.113.147/"
account = "****"
password = "****"
AppID = "****"
API = "****"
secret = "****"
def __init__(self):
option = webdriver.ChromeOptions()
option.add_argument('headless')
self.driver = webdriver.Chrome(options=option)
self.driver.set_window_size(1920, 900)
self.driver.maximize_window()
def get_yzm(self):
self.driver.get(self.homeurl)
png = self.driver.find_element_by_id("vchart")
png.screenshot('yzm.png')
def discern_yzm(self):
# 识别码
APP_ID = self.AppID
API_KEY = self.API
SECRET_KEY = self.secret
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
def get_file_content(file_path):
with open(file_path, 'rb') as f:
return f.read()
image = get_file_content('yzm.png')
# 定义参数变量
options = {'language_type': 'ENG', } # 识别语言类型,默认为'CHN_ENG'中英文混合
# 调用通用文字识别
result = client.basicGeneral(image, options) # 高精度接口 basicAccurate
for word in result['words_result']:
yzm = (word['words'])
#print('识别结果:' + yzm)
yzm = re.sub('[\W_]+', '', yzm)
yzm.replace(" ", "")
return yzm
def send_keys(self,yzm):
self.driver.find_element_by_name("zjh").send_keys(self.account)
self.driver.find_element_by_name("mm").send_keys(self.password)
self.driver.find_element_by_name("v_yzm").send_keys(yzm)
self.driver.find_element_by_id("btnSure").click()
def login(self):
while True:
self.get_yzm()
yzm = self.discern_yzm()
self.send_keys(yzm)
if "个人管理" in self.driver.title:
print("登录成功!")
break
else:
time.sleep(0.5)
print("验证码错误,正在重新登录~~~")
def ask_grades(self):
self.login()
self.driver.get("http://202.119.113.147/bxqcjcxAction.do")
self.driver.get_screenshot_as_file("grades.png")
self.driver.get("http://202.119.113.147/reportFiles/bzrcx/jdpmcx.jsp?temp=1")
self.driver.get_screenshot_as_file("rank.png")
self.driver.get("http://202.119.113.147/xkAction.do?actionType=6")
self.driver.get_screenshot_as_file("schedule.png")
print("查询成功!")
requests
Requests 唯一的一个非转基因的 Python HTTP 库,人类可以安全享用。
警告:非专业使用其他 HTTP 库会导致危险的副作用,包括:安全缺陷症、冗余代码症、重新发明轮子症、啃文档症、抑郁、头疼、甚至死亡。
直接贴代码吧,想学习直接去看 官方文档
import re
import requests
import configparser
from aip import AipOcr
class req_spider:
homeurl = "http://202.119.113.147/"
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
}
self.post_url = self.homeurl + 'loginAction.do'
self.yzm_url = self.homeurl + 'validateCodeAction.do'
self.session = requests.Session()
def get_yzm(self):
with open('yzm.png', 'wb') as f:
f.write(self.session.get(self.yzm_url, headers=self.headers).content)
def discern_yzm(self):
APP_ID = '****'
API_KEY = '****'
SECRET_KEY = '****'
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
def get_file_content(file_path):
with open(file_path, 'rb') as f:
return f.read()
image = get_file_content('yzm.png')
options = {'language_type': 'ENG', }
# 调用通用文字识别
result = client.basicGeneral(image, options) # 高精度接口 basicAccurate
for word in result['words_result']:
yzm = (word['words'])
yzm = re.sub('[\W_]+', '', yzm)
yzm.replace(" ", "")
return yzm
def login(self):
self.account = "****"
self.password = "****"
while True:
self.get_yzm()
yzm = self.discern_yzm()
post_data = {
'zjh': self.account,
'mm': self.password,
'v_yzm': yzm,
}
loginResponse = self.session.post(self.post_url, data=post_data, headers=self.headers)
if ("学分制综合教务" in loginResponse.text):
print("登录成功!")
#print(loginResponse.text)
break
else:
print("正在重新登录")