最近刷快手发现有些想缓存的视频下载不了,于是就用了一两天的时间研究了一下快手,可以通过用户的快手ID(用eid也可以)下载用户所有公开的视频(私有视频需要用户的token,不登录拿不到),可能有些用户没有快手ID,只有用户id,所以在文章后面我会放上一段通过用户id查询用户eid的代码。
import os
import re
import json
import requests
# ID:Lijing1997
# ID: h990901h
# ID: 8319075(eid:3xucs5xtvau5jdg)
# ID: Heniao__
USER_IDS = ['Lijing1997'] # 快手ID列表,也可以用eid列表
SAVE_PATH = 'C:\\Users\\Lay\\Desktop' # feeds下载存储路径
# 请求头
headers = {
'Proxy-Connection': 'keep-alive',
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Upgrade-Insecure-Requests': '1',
'Referer': 'https://live.kuaishou.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
}
user_num = 1
for USER_ID in USER_IDS:
# feeds查询语句
user_public_feeds_query_string = {"operationName": "RecoFeeds",
"variables": {"principalId": USER_ID, "pcursor": "", "count": 999},
"query": "query RecoFeeds($principalId: String, $pcursor: String, $count: Int) {\n publicFeeds(principalId: $principalId, pcursor: $pcursor, count: $count) {\n pcursor\n list {\n user {\n id\n profile\n name\n __typename\n }\n photoId\n caption\n poster\n likeCount\n playUrl\n imgUrls\n imgSizes\n width\n height\n expTag\n __typename\n }\n __typename\n }\n}\n"}
# 查询用户公开的feeds数据
r_graphql = requests.post('https://live.kuaishou.com/graphql', headers=headers, json=user_public_feeds_query_string, verify=False)
user_public_feeds_data = json.loads(r_graphql.content.decode(encoding='utf-8', errors='strict'))['data']
feeds_path = os.path.join(SAVE_PATH, 'NetFinder')
print('正在下载:{user_id}......'.format(user_id=USER_ID))
# 遍历用户feeds列表
feed_num = 1
for feed in user_public_feeds_data['publicFeeds']['list']:
# 去掉caption中的空白符号和不支持的文件符号, [0:99]为win系统文件名长度限制,具体不知,这里取99
feed['caption'] = re.sub(r'[\/\\\:\*\?\"\<\>\|\s]', ',', feed['caption'])[0:99]
if feed['caption'] == '...':
feed['caption'] = '未命名'
elif '.' in feed['caption']:
feed['caption'].replace('.', '#')
# 为每个feed创建一个路径
feed_path = os.path.join(feeds_path, feed['user']['name']+'({user_id})'.format(user_id=USER_ID), feed['caption'])
if not os.path.exists(feed_path):
os.makedirs(feed_path)
# 下载并写入文件
if feed['poster']:
poster_res = requests.get(feed['poster'], headers=headers, verify=False)
with open(os.path.join(feed_path, '{feed_name}.{suffix}'.format(feed_name=feed['caption'], suffix=feed['poster'].split('.')[-1])), 'wb') as f:
f.write(poster_res.content)
if feed['playUrl']:
video_res = requests.get(feed['playUrl'], headers=headers, verify=False)
with open(os.path.join(feed_path, '{feed_name}.{suffix}'.format(feed_name=feed['caption'], suffix=feed['playUrl'].split('.')[-1])), 'wb') as f:
f.write(video_res.content)
elif feed['imgUrls']:
i = 1
for img in feed['imgUrls']:
img_res = requests.get(img, headers=headers, verify=False)
try:
with open(os.path.join(feed_path, '{num}-{feed_name}.{suffix}'.format(num=str(i), feed_name=feed['caption'], suffix=img.split('.')[-1])), 'wb') as f:
f.write(img_res.content)
except FileNotFoundError:
pass
i += 1
print('({feed_num}):{feed_name}---下载完毕!'.format(feed_num=feed_num, feed_name=feed['caption']))
feed_num += 1
print('({user_num}){user_id}---下载结束!'.format(user_num=user_num, user_id=USER_ID))
user_num += 1
通过用户id查询用户eid,eid可用于上面的视频下载
headers = {
'Proxy-Connection': 'keep-alive',
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Upgrade-Insecure-Requests': '1',
'Referer': 'https://live.kuaishou.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
}
num = 1
authors_info_list = []
key_words = 3848980 # 用户ID(非快手ID)
while True:
authors_info_query_string = {"operationName":"SearchDetailQuery","variables":{"key":str(key_words),"type":"author","page":num,"lssid":None,"ussid":None},"query":"query SearchDetailQuery($key: String, $type: String, $page: Int, $lssid: String, $ussid: String) {\n searchDetail(key: $key, type: $type, page: $page, lssid: $lssid, ussid: $ussid) {\n ... on SearchCategoryList {\n type\n list {\n id\n categoryId\n title\n src\n roomNumber\n __typename\n }\n __typename\n }\n ... on SearchUserList {\n type\n ussid\n list {\n id\n name\n living\n profile\n sex\n description\n countsInfo {\n fan\n follow\n photo\n __typename\n }\n __typename\n }\n __typename\n }\n ... on SearchLivestreamList {\n type\n lssid\n list {\n user {\n id\n profile\n name\n __typename\n }\n watchingCount\n src\n title\n gameId\n gameName\n categoryId\n liveStreamId\n playUrls {\n quality\n url\n __typename\n }\n quality\n gameInfo {\n category\n name\n pubgSurvival\n type\n kingHero\n __typename\n }\n redPack\n liveGuess\n expTag\n __typename\n }\n __typename\n }\n __typename\n }\n}\n"}
r_graphql = requests.post('https://live.kuaishou.com/graphql', headers=headers, json=authors_info_query_string, verify=False)
try:
authors_info = json.loads(r_graphql.content.decode(encoding='utf-8', errors='strict'))['data']['searchDetail']
authors_info_list += authors_info['list']
print(authors_info['list']) # 输出中的id为用户eid
if len(authors_info['list']) < 15:
print(authors_info_list)
break
else:
num += 1
except json.decoder.JSONDecodeError:
pass