一般网站登录的时候会有验证码的问题,下面是将验证码下载到本地,手动输入模拟登录。在请求的时候使用会话是为了保证获取的验证码、表单令牌等数据一致
import requests
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36'
' (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}
def download_code(s, url):
resp = s.get(url, headers=headers)
text_code = resp.encoding
text = resp.content.decode(text_code, 'ignore')
html = BeautifulSoup(text, 'lxml')
# 获取图片链接
img_src = 'https://so.gushiwen.org' + html.find(id='imgCode')['src']
# 下载图片
img = s.get(img_src, headers=headers)
img_text = img.content
with open("yan.jpg", 'wb') as f:
f.write(img_text)
def get_vic(s):
url = "https://so.gushiwen.org/user/login.aspx"
# 获取表单登录令牌
resp = s.get(url, headers&#