声明:本文只作学习研究,禁止用于非法用途,否则后果自负,如有侵权,请告知删除,谢谢!
主页视频那么多,想看一下这个人剪辑的火影忍者刷好久 于是看看主页视频的sign。
_m_h5_tk 和 _m_h5_tk_enc
对于直接下拉请求没有啥加密 直接拼接参数 忽略_m的参数 只需要 pageNo/uid/time_stamp
def get_parms_nokeywords(userId="",pcursor=1,_m_h5_tk="",_m_h5_tk_enc="",token="",time_stamp_=""):
params = (
('type', 'video'),
('pageNo', pcursor),
('nextSession',
'{"subIndex":192,"trackInfo":{"parentdrawerid":"4433"},"spmA":"miniapp","spmC":"drawer2","spmB":"homepage","index":2,"pageName":"page_miniapp","scene":"home_page_component_paging","scmB":"rcmd","path":"24776,4433,4432,5426","scmA":"20140689","scmC":"24776","id":24776}'),
# ('uid', 'UMTIzMzg2MjU5NjA='),
('uid', userId),
('isGray', '0'),
('extend', '{}'),
# ('_', '1635412487834'),
('_', time_stamp_),
('callback', 'xyy'),
)
response = requests.get('https://www.youku.com/profile/profile-data', headers=HEADERS, params=params)
return response.text
对于带搜索关键词的请求 出现了加密参数 👇 sign
简单说一下这个参数的加密 需要token/time_stamp_/data
# 作者主页
def get_youkusign(token,time_stamp_,data,appk="23774304"):
""":param
token -> 固定的参数通过第一次的请求获得
time_stamp_ -> 固定的参数通过第一次的请求获得
appk -> 固定的参数
data -> 需要加密的参数 会变
"""
text = """{token}&{time_stamp_}&{appk}&{data}""".format(token=token,time_stamp_=time_stamp_,appk=appk,data=data)
return md5_use(text)
请求:👇 注意里面的几个坑 _m_h5_tk/_m_h5_tk_enc/token/time_stamp 的获取需要注意 ,对于参数里作者的ID需要进行作者ID转换 关于某酷的视频ID 用户ID我在这里有介绍 yk酷eid转换
@retry(stop_max_attempt_number=9, wait_fixed=20)
def get_parms_keywords(userId="",pcursor=1,keyword="",_m_h5_tk="",_m_h5_tk_enc="",token="",time_stamp_=""):
data = '{"searchType":1,"pg":%s,"pz":20,"site":1,"appCaller":"pc_user","appScene":"user_page_search","sdkver":315,"aaid":"2a0b27ad1b05f550018adec45675bb41","utdId":"%s","searchFrom":1,"sourceFrom":"home","userAgent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36","userType":"guest","userId":"","keyword":"%s","sceneContentId":"%s"}'
data = data%(pcursor,eid2uid(userId),keyword.encode('unicode-escape').decode(),eid2uid(userId))
sign_str = get_youkusign(token,time_stamp_,data)
cookies = {
'_m_h5_tk': _m_h5_tk,
'_m_h5_tk_enc': _m_h5_tk_enc,
}
params = (
('jsv', '2.4.2'),
('appKey', '23774304'),
('t', time_stamp_),
('sign', sign_str),
('api', 'mtop.youku.soku.yksearch'),
('v', '2.0'),
('dataType', 'jsonp'),
('jsonpIncPrefix', '1635155898727'),
('type', 'jsonp'),
('callback', 'xyy'),
('data',data)
)
response = requests.get('https://acs.youku.com/h5/mtop.youku.soku.yksearch/2.0/', headers=HEADERS, params=params,
cookies=cookies)
return response.text
PS:_m_h5_tk/_m_h5_tk_enc/token/time_stamp 每次都是最新实时获取不然就会风控。和TB一个样贴一个阉割的不带个人信息的 (0.0、)
以上 就完成对主页视频采集
欢迎👏关注我的GitHub 欢迎star
我会分享一些平时的爬虫小例子 我们一起讨论