Python爬虫爬取快手视频代码-CSDN博客

本文链接：https://blog.csdn.net/qq_32257509/article/details/142783431

import pprint
import requests
import os
import re
import json

def get_response(url, keywords,pcursor):
hearders = {
‘Accept’: ‘/’,
‘Accept-Encoding’: ‘gzip, deflate, br’,
‘Accept-Language’: ‘zh-CN,zh;q=0.9’,
‘Connection’: ‘keep-alive’,
‘Content-Length’: ‘1839’,
‘Content-Type’: ‘application/json’,
‘Cookie’: ‘kpf=PC_WEB; clientid=3; did=web_713774521487450db89fcfc3892aae65; didv=1705562481178; ktrace-context=1|MS43NjQ1ODM2OTgyODY2OTgyLjQzOTc2MzU1LjE3MDU1NjM4MDkxNTEuNzUzNzYy|MS43NjQ1ODM2OTgyODY2OTgyLjk2MjU0NDIxLjE3MDU1NjM4MDkxNTEuNzUzNzYz|0|graphql-server|webservice|false|NA; kpn=KUAISHOU_VISION’,
‘Host’: ‘www.kuaishou.com’,
‘Origin’: ‘https://www.kuaishou.com’,
‘Referer’: ‘https://www.kuaishou.com/search/video?searchKey=%E6%85%A2%E6%91%87’,
‘Sec-Ch-Ua’: ‘“Not_A Brand”;v=“8”, “Chromium”;v=“120”, “Google Chrome”;v=“120”’,
‘Sec-Ch-Ua-Mobile’: ‘?0’,
‘Sec-Ch-Ua-Platform’: ‘“Windows”’,
‘Sec-Fetch-Dest’: ‘empty’,
‘Sec-Fetch-Mode’: ‘cors’,
‘Sec-Fetch-Site’: ‘same-origin’,
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36’,
}
data = {
“operationName”: “visionSearchPhoto”,
“query”: “fragment photoContent on PhotoEntity {\n __typename\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n __typename\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n …photoContent\n …recoPhotoFragment\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionSearchPhoto($keyword: String, $pcursor: String, $searchSessionId: String, $page: String, $webPageArea: String) {\n visionSearchPhoto(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n …feedContent\n __typename\n }\n searchSessionId\n pcursor\n aladdinBanner {\n imgUrl\n link\n __typename\n }\n __typename\n }\n}\n”,
“variables”: {“keyword”: keywords, “pcursor”: pcursor, “page”: “search”}
}
data = json.dumps(data)
response = requests.post(url=url, data=data, headers=hearders)
return response

def save(url, dir_name,pcursor):
response = get_response(url, dir_name,pcursor)
json_data = response.json()
feed_list = json_data[‘data’][‘visionSearchPhoto’][‘feeds’] # 这是一个列表
for feeds in feed_list:
try:
video_url = feeds[‘photo’][‘photoUrl’]
title = feeds[‘photo’][‘caption’]
new_title = re.sub(r’[/😗?"<>|@#]’, ‘’, title).split(’ ')[0]
print(video_url, new_title)
mp4_data = requests.get(video_url).content
if not os.path.exists(dir_name):
os.mkdir(dir_name)
with open(dir_name + ‘/’ + new_title + ‘.mp4’, mode=“wb”) as f:
f.write(mp4_data)
print(new_title + ‘下载成功’)
except:
print(‘下载失败’)

if name == ‘main’:
url = “https://www.kuaishou.com/graphql”
dir_name = input(‘请输入快手关键词’)
pcursor=input(‘请输入要下载的页数’)
for i in pcursor:
save(url, dir_name,str(i))