requests登录淘宝并访问搜索商品页面
参考:
猪哥66
零度风格
python格式化
import os
import requests
import re
from bs4 import BeautifulSoup
import time
import json
import csv
import hashlib
import codecs
def getMD5(s) :
md5 = hashlib.md5()
md5.update(s.encode('utf-8'))
return md5.hexdigest()
def bytesToStr(bs) :
bs = bs.replace('%', '\\x')
bss = codecs.escape_decode(bs, 'hex-escape')[0]
bss = bss.decode('utf-8')
return bss
reqS = requests.Session()
COOKIES_FILE_PATH = 'D:\\taobao_login_cookies.txt'
class TBLogin:
"""
须知:
开始前要自己先打开F12,登录淘宝https://login.taobao.com/member/login.jhtml,获取login_data的一些关键参数,例如:loginId,password2,ua,_csrf_token,umidToken,hsiz。这些参数可能会该改变。
功能:
参考别人博客的思路是,首先判断是否需要滑块验证,然后验证密码,验证成功的话,返回的页面内容大致如下:
{"content":{"data":{"redirect":true,"redirectUrl":"https://i.taobao.com/my_taobao.htm?nekot=xxx=xxx","asyncUrls":["https://passport.alibaba.com/mini_apply_st.js?callback=callback&site=0&token=xxx"],"resultCode":100},"status":0,"success":true},"hasError":false}
asyncUrls是申请st码的地址
然后通过申请st码的地址来获取st码,最后再使用st码登录。
我个人感觉貌似只需要验证密码返回成功,就已经算登录成功了,因为验证完密码,就重定向到自己主页了,至于为什么要获取st码并再登录一次,不太清楚。
留坑:
能登录成功的关键在于ua,有合法的基于用户身份的ua登录才不用滑块验证。一般输入完账户还没输入密码的时候就会发出一个post请求,带有ua,而且ua是动态的!
这个ua是在登录页面的console通过命令:window["_n"] or window[UA_Opt.LogVal]获取。
@property什么用
参考:
[猪哥66](https://blog.csdn.net/u014044812/article/details/99584382)
从代码层面将模拟登录淘宝分为以下四个步骤:
输入用户名后,浏览器会向淘宝(taobao.com)发起一个post的请求,判断是否出现滑块验证!
用户输入密码后,浏览器向淘宝(taobao.com)又发起一个post请求,验证用户名密码是否正确,如果正确则返回一个token。
浏览器拿着token去阿里巴巴(alibaba.com)交换st码!
浏览器获取st码之后,拿着st码获取cookies,登录成功
https://www.cnblogs.com/542684416-qq/p/11427600.html
https://blog.csdn.net/weixin_41624982/article/details/86710995
https://mp.weixin.qq.com/s?__biz=MzI2OTQ1NzEyMQ==&mid=2247483923&idx=1&sn=b54293568b411695dac564a6433c2016&chksm=eae1419ddd96c88bb653a1cef4b7cdbeb0ff5b983c173b4e3730f01b0dfec28d391a43c45b76&scene=21#wechat_redirect
"""
def __init__(self, loginId, password2, ua, _csrf_token, umidToken, hsiz) :
"""
账号登录对象
:param loginId: 用户名
:param password2: 加密后的密码
:param ua: 淘宝的ua参数
:param _csrf_token: _csrf_token
:param umidToken: 新版登录新增参数
:param hsiz: hsiz
"""
self.user_check_url = "https://login.taobao.com/member/request_nick_check.do?_input_charset=utf-8"
self.verify_password_url = "https://login.taobao.com/newlogin/login.do?appName=taobao&fromSite=0"
self.apply_st_url = "https://passport.alibaba.com/mini_apply_st.js?callback=callback&site=0&token=xxx"
self.vst_url = 'https://login.taobao.com/member/vst.htm?st={}'
self.my_taobao_url = 'http://i.taobao.com/my_taobao.htm'
self.stma = ""
self.loginId = loginId
self.password2 = password2
self.ua = ua
self._csrf_token = _csrf_token
self.umidToken = umidToken
self.hsiz = hsiz
def user_check_vcode(self) :
"""
检测账号是否需要验证码
:return:
"""
print("调用账户是否需要滑动验证码接口(True:需要/False:不需要)")
user_check_result = True
user_check_url = self.user_check_url
data = {
"username": self.loginId,
'ua': self.ua
}
try:
user_check_response = reqS.post(user_check_url, data=data)
user_check_result = user_check_response.json()["needcode"]
print("返回结果为: %s" % user_check_result)
except Exception as e:
print("用户验证接口请求失败,msg:{}".format(e))
user_check_result = True
print("需要滑动验证,搞不定!休息10秒再来一次试试")
return user_check_result
def get_umidToken(self) :
"""
获取umidToken参数
:return:
"""
response = reqS.get('https://login.taobao.com/member/login.jhtml')
umidToken_match = re.search(r'"umidToken":"(.*?)"', response.text)
print("umidToken: ", umidToken_match.group(1))
return umidToken_match.group(1)
def verify_password(self) :
"""
验证账户密码是否正确
:return:
"""
verify_ans = False
login_headers = {
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Content-Type': 'application/x-www-form-urlencoded',
"origin": "https://login.taobao.com",
"referer": "https://login.taobao.com/member/login.jhtml",
"user-agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36',
}
login_data = {
'loginId': self.loginId,
'password2': self.password2,
'keepLogin':'false',
'ua': self.ua,
'umidGetStatusVal':' 255',
'screenPixel':' 1536x864',
'navlanguage':' zh-CN',
"navUserAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36",
'navPlatform':' Win32',
'appName':' taobao',
'appEntrance':' taobao_pc',
'_csrf_token': self._csrf_token,
'umidToken': self.umidToken,
'hsiz': self.hsiz,
'style':' default',
'appkey':' 00000000',
'from':' tb',
'isMobile':' false',
'lang':' zh_CN',
'returnUrl':' http://i.taobao.com/my_taobao.htm',
'fromSite':' 0',
'bx-ua': 'xxx',
'bx-umidtoken': 'xxx'
}
try:
r = reqS.post(self.verify_password_url, headers = login_headers, data = login_data, timeout = 3)
cookieJar = reqS.cookies
cookieDict = requests.utils.dict_from_cookiejar(cookieJar)
print(r.status_code, r.request.url)
print(cookieJar)
print(cookieDict)
r.raise_for_status()
r.encoding = r.apparent_encoding
print("验证密码返回内容:")
print(r.text)
self.apply_st_url = r.json()['content']['data']['asyncUrls'][0]
if self.apply_st_url:
verify_ans = True
print("验证密码并获取申请st码地址成功。")
else:
print("未获取到申请st码地址。")
except :
verify_ans = False
print("验证密码并获取申请st码地址出现bug。")
return verify_ans
def apply_st(self) :
"""
获取st码
:return:
"""
ast_flag = False
try:
r = reqS.get(self.apply_st_url)
r.raise_for_status()
st_match = re.search(r'"data":{"st":"(.*?)"}', r.text)
self.stma = st_match.group(1)
if self.stma:
ast_flag = True
print('获取st码成功,st码:{}'.format(st_match.group(1)))
else :
raise RuntimeError('获取st码失败')
except Exception as e:
print('申请st码请求失败!')
ast_flag = False
return ast_flag
def login_with_stma(self) :
"""
首先判断是否需要滑块验证,然后验证密码,然后获取st码,再使用st码登录
:return:
"""
login_result = False
ucv_r = self.user_check_vcode()
if ucv_r :
return False
vp_r = self.verify_password()
if not vp_r :
return False
as_r = self.apply_st()
if not as_r :
return False
login_headers = {
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Content-Type': 'application/x-www-form-urlencoded',
"origin": "https://login.taobao.com",
"referer": "https://login.taobao.com/member/login.jhtml",
"user-agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36',
}
try:
url = self.vst_url.format(self.stma)
print('vst login url:')
print(url)
response = reqS.get(self.vst_url.format(self.stma), headers = login_headers, timeout = 3)
response.raise_for_status()
print("response.text:")
print(response.text)
print('--end--')
my_taobao_match = re.search(r'top.location.href = "(.*?)"', response.text)
print('登录淘宝成功,跳转链接:{}'.format(my_taobao_match.group(1)))
self.my_taobao_url = my_taobao_match.group(1)
login_result = True
self.serialization_cookies()
except Exception as e:
print('登录淘宝失败,{}.'.format(e))
login_result = False
return login_result
def serialization_cookies(self):
"""
序列化cookies并保持cookieDict
:return:
"""
cookies_dict = requests.utils.dict_from_cookiejar(reqS.cookies)
print('cookie 如下:')
print(reqS.cookies)
print(cookies_dict)
with open(COOKIES_FILE_PATH, 'w+', encoding='utf-8') as file:
json.dump(cookies_dict, file)
print('保存cookies文件成功!')
def deserialization_cookies(self) :
"""
反序列化cookies
:return:
"""
cookies = ""
with open(COOKIES_FILE_PATH, 'r+', encoding='utf-8') as file:
cookies_dict = json.load(file)
cookies = requests.utils.cookiejar_from_dict(cookies_dict)
return cookies
def load_cookies(self):
"""
从COOKIES_FILE_PATH加载之前保存的cookieDict内容
:return:
"""
if not os.path.exists(COOKIES_FILE_PATH) :
return False
reqS.cookies = self.deserialization_cookies()
try:
self.get_taobao_nick_name()
except Exception as e:
os.remove(COOKIES_FILE_PATH)
print('cookies过期,删除cookies文件!')
return False
print('加载淘宝cookies登录成功!!!')
return True
def get_taobao_nick_name(self) :
"""
通过requests.Session()尝试获取淘宝昵称。
最主要目的是判断Session中是否有合法的cookieJar。
:return: 淘宝昵称
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
try:
response = reqS.get(self.my_taobao_url, headers=headers)
response.raise_for_status()
nick_name_match = re.search(r'<input id="mtb-nickname" type="hidden" value="(.*?)"/>', response.text)
if nick_name_match:
print('登录淘宝成功,你的用户名是:{}'.format(nick_name_match.group(1)))
return nick_name_match.group(1)
else:
raise RuntimeError('获取淘宝昵称失败!')
except Exception as e:
print('获取淘宝昵称失败!')
return '获取淘宝昵称失败!'
def testSessionCookie(self) :
"""
dic是上面登录淘宝后得到的cookieJar转换成的cookieDict
本函数测试通过上面得到的cookie能否访问淘宝商品页面
:return:
"""
dic = {}
with open(COOKIES_FILE_PATH, 'r+', encoding='utf-8') as file:
dic = json.load(file)
cookieStr = ''
for key in dic:
cookieStr = cookieStr + key + '=' + dic[key] + ';'
print(cookieStr)
print("")
headers = {"user-agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36',
"referer": "https://s.taobao.com/",
'cookie': cookieStr
}
testurl = "https://s.taobao.com/search?q=笔记本"
try:
r = requests.get(testurl, headers = headers, timeout = 3)
print(r.status_code, r.encoding, r.apparent_encoding)
print(r.request.url, r.url)
r.raise_for_status()
r.encoding = r.apparent_encoding
print(r.text)
except :
print("test fail")
def test(self) :
"""
测试
:return:
"""
gu = self.get_umidToken()
print(gu)
nick_name = self.get_taobao_nick_name()
def main() :
'''
自己的参数
'''
tbl = TBLogin(loginId, password2, ua, _csrf_token, umidToken, hsiz)
tbl.test()
main()