登录常用手段就是:
-
request.post请求
-
selenium等自动化工具
这里讲一下使用requests实现自动登录
网站
现在网站登录基本上都有验证码,requests登录的难点在于将验证码与账号联系起来,这里是cookie和referer。
def login_dianxiaomi():
"""
店小秘
将code与账号联系起来的纽带就在cookie和referer,需要考虑登录主页,code获取和登录ajax三方
"""
i = 0
while i < 3:
ts = int(time.time())
homepage = f'https://www.dianxiaomi.com/index.htm?ts={ts}' # 主页,这里作为后面的referer
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cookie': '_dxm_ad_client_id=C480E8334FD936CCDEB3BA1AD1F98E49F; Hm_lvt_f8001a3f3d9bf5923f780580eb550c0b=1670383137,1670400951; '
'JSESSIONID=857D325C586104913A42E668792A8870; dxm_vc=NjlhZDZiOGYwMzk0YzdiYjRlOWJlY2M2OWJiMGY4Y2MhMTY3MDQwMzcxMzg2NQ; '
'Hm_lpvt_f8001a3f3d9bf5923f780580eb550c0b=1670403714', # 直接网页复制获取
'Host': 'www.dianxiaomi.com',
'Referer': 'https://www.dianxiaomi.com/home.htm',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
}
res = requests.get(homepage, headers=headers, timeout=30)
cookie = res.cookies.items()
jsessionid = cookie[0][1]
print('homepage jsessionid:', jsessionid)
# 获取验证码
url = f'https://www.dianxiaomi.com/verify/code.htm?t={int(time.time() * 1000)}'
headers = {'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cookie': '_dxm_ad_client_id=C480E8334FD936CCDEB3BA1AD1F98E49F; Hm_lvt_f8001a3f3d9bf5923f780580eb550c0b=1670383137,1670400951;'
f'Hm_lpvt_f8001a3f3d9bf5923f780580eb550c0b=1670402174; JSESSIONID={jsessionid}; ' # 这里的jessionid来自主页cookie
'dxm_vc=NjlhZDZiOGYwMzk0YzdiYjRlOWJlY2M2OWJiMGY4Y2MhMTY3MDQwMzcxMzg2NQ',
'Host': 'www.dianxiaomi.com',
'Referer': f'https://www.dianxiaomi.com/index.htm?ts={ts}',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
}
res = requests.get(url, headers=headers, timeout=30)
cookie = res.cookies.items()
print('code cookie:', cookie)
if len(cookie) == 1:
i += 1
print('code url cookie not right', f'try {i} times')
continue
code_jsessionid = cookie[0][1]
code_dxm_vc = cookie[1][1]
with open('dianxiaomi.png', 'wb') as f:
f.write(res.content)
# 图像二值化
im = Image.open('dianxiaomi.png')
img_gray = im.convert('L')
img_gray.save('dianxiaomi.png')
code = get_code('dianxiaomi.png')
print('code:', code)
login_url = 'https://www.dianxiaomi.com/user/userLoginNew2.json'
headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
'Referer': f'https://www.dianxiaomi.com/index.htm?ts={ts}',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cookie': '_dxm_ad_client_id=C480E8334FD936CCDEB3BA1AD1F98E49F; Hm_lvt_f8001a3f3d9bf5923f780580eb550c0b=1670383137,1670400951; '
f'Hm_lpvt_f8001a3f3d9bf5923f780580eb550c0b=1670402174; JSESSIONID={code_jsessionid}; ' # 这里的jessionid和dxm_vc来自请求code的cookie
f'dxm_vc={code_dxm_vc}'
}
data = {
'account': 'xx',
'password': 'xxx',
'dxmVerify': code,
'loginVerifyCode': '',
'remeber': 'remeber',
'url': ''
}
response = requests.post(login_url, data=data, headers=headers, timeout=30)
print(response.text)
# if '有用户在其它地方,登录此账号' in response.text:
# print('有用户在其它地方,登录此账号')
# return
res = response.json()
if res['code'] == -1:
i += 1
print(res['error'], f'try {i} times')
continue
else:
# 获取requests请求返回的cookie
cookie = response.cookies.items()
print(cookie)
cookies = ''
for c in cookie:
cookies += c[0] + '=' + c[1] + ';'
print(11, cookies)
return cookies
欢迎关注,爬虫王者