python爬虫学习(待完成)
记录时间:2021年2月7日
学习视频观看地址:https://www.bilibili.com/video/BV1Yh411o7Sz?p=55
需求1:12306模拟登录
在这个操作时,我们需要识别类似于这样的验证码:
,需要使用第三方平台进行识别(例如:超级鹰)。
超级鹰接口部分的代码简单测试:
import requests
from hashlib import md5
class Chaojiying_Client(object):
def __init__(self, username, password, soft_id):
self.username = username
password = password.encode('utf8')
self.password = md5(password).hexdigest()
self.soft_id = soft_id
self.base_params = {
'user': self.username,
'pass2': self.password,
'softid': self.soft_id,
}
self.headers = {
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
}
def PostPic(self, im, codetype):
"""
im: 图片字节
codetype: 题目类型 参考 http://www.chaojiying.com/price.html
"""
params = {
'codetype': codetype,
}
params.update(self.base_params)
files = {'userfile': ('ccc.jpg', im)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files,
headers=self.headers)
return r.json()
def ReportError(self, im_id):
"""
im_id:报错题目的图片ID
"""
params = {
'id': im_id,
}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
return r.json()
if __name__ == '__main__':
chaojiying = Chaojiying_Client('xxxxxxxx', 'xxxxxxxx', 'xxxxx') # 用户名 密码 软件ID
im = open('b.jpg', 'rb').read() # 本地图片文件路径
print(chaojiying.PostPic(im, 9004)['pic_str']) # 验证码类型
另外,由于再次请求验证码会导致验证码刷新,所以要采取截图的方式获取验证码图片
先写一半,后面的还没测试出来,搞懂了再补充
from selenium import webdriver
from PIL import Image
import time
from 超级鹰接口.chaojiying import Chaojiying_Client
if __name__ == '__main__':
edge = webdriver.Edge('./msedgedriver.exe')
edge.maximize_window()
edge.get('https://kyfw.12306.cn/otn/resources/login.html')
time.sleep(2)
switch_btn = edge.find_element_by_xpath('/html/body/div[2]/div[2]/ul/li[2]/a')
switch_btn.click()
# 获取验证码图片,使用截图
time.sleep(2)
edge.save_screenshot('./12306模拟登录/1.png')
imgCode = edge.find_element_by_xpath('//*[@id="J-loginImg"]')
location = imgCode.location
size = imgCode.size
rangle = (
int(location['x']), int(location['y']), int(location['x']+size['width']), int(location['y']+size['height'])
)
i = Image.open('./12306模拟登录/1.png')
imgCode_path = './12306模拟登录/imgCode.png'
frame = i.crop(rangle)
frame.save(imgCode_path)
# 提交验证码识别出坐标
chaojiying = Chaojiying_Client('xxxxxxxx', 'xxxxxxxx', 'xxxxx') # 用户名 密码 软件ID
im = open('./12306模拟登录/imgCode.png', 'rb').read() # 本地图片文件路径
result = chaojiying.PostPic(im, 9004)['pic_str'] # 验证码类型