import requests
from lxml import etree
import ddddocr
headers={
"User-Agent":"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36"
}
url="https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx"
session = requests.Session()
page_text=session.get(url=url,headers=headers).text
tree=etree.HTML(page_text)
code_img_src="https://so.gushiwen.cn"+tree.xpath('//*[@id="imgCode"]/@src')[0]
img_data=session.get(url=code_img_src,headers=headers).content
with open("./code.jpg","wb") as fp:
fp.write(img_data)
ocr = ddddocr.DdddOcr()
with open("code.jpg", 'rb') as f:
img_bytes = f.read()
res = ocr.classification(img_bytes)
print("结果:")
print(res)
login_url="https://so.gushiwen.cn/user/login.aspx?from=http%3a%2f%2fso.gushiwen.cn%2fuser%2fcollect.aspx"
data={
'__VIEWSTATE': 'ZzBQXCdDLDM4ohM27F3P5IhXnThuqN3wFBQ4yoHip4hezQkouHqDIK0E2S19KYJIDaZ4uz/lzUn10HUCasejClHb3QvBnxAIdUB4KPtXLtK2Lh9rhdP6yqYgHoe17EBtmiKxzRQQ/Ca0uGIyEOgn/2pWb2g=',
'__VIEWSTATEGENERATOR': 'C93BE1AE',
'from': 'http://so.gushiwen.cn/user/collect.aspx',
'email': '2364594659@qq.com',
'pwd': 'zhanghao',
'code': res,
'denglu': '登录'
}
res=session.post(url=login_url,headers=headers,data=data)
login_page_text=res.text
print(res.status_code)
with open("gushiwen.html","w",encoding="utf-8") as fp:
fp.write(login_page_text)
模拟登录古诗文网(用免费的ddddocr识别验证码)
最新推荐文章于 2024-09-10 13:01:50 发布