最新PYTHON批量下载快手个人主页短视频代码（带翻页）

爱吃果冻的小哥哥
已于 2023-09-21 10:40:58 修改
阅读量3.6k
点赞数
分类专栏：抖音文章标签： python
于 2022-10-19 08:56:13 首次发布
本文链接：https://blog.csdn.net/guodongg/article/details/127400619
版权
抖音专栏收录该内容
3 篇文章 4 订阅
订阅专栏
import os
import time
import requests
import json
class KsStd(object):
    ksHeaders = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
                      ' Chrome/98.0.4758.82 Safari/537.36',
        'content-type': 'application/json',
        'Host': 'www.kuaishou.com',
        'Origin': 'https://www.kuaishou.com',
        'Cookie': "Cookie"
    }
    # 要获取登录后的COOKIE才可以获取翻页内容
    ks_base_url = "https://www.kuaishou.com/graphql"
    sdir = "G:\新建文件夹\快手下载\\"

    def __init__(self):
        pass


    # 构建公共接口请求头部
    def header_data_build(self, meg_param={}):
        meg_param.update(self.ksHeaders)
        return meg_param


    # 请求用户主页
    def req_monitor(self, profile_url):
        ks_user_id = profile_url.split('/')[-1]
        ks_headers = self.header_data_build({'Referer': profile_url})
        # upper 信息
        self.userdir = self.ks_upper_api_data(ks_user_id, ks_headers)
        # 视频信息
        self.handle_video_info(ks_user_id, ks_headers)


    # 构建快手 upper apiData
    def ks_upper_api_data(self, ks_user_id, ks_headers):
        ks_upper_data = {
            "operationName": "visionProfile",
            "variables": {
                "userId": ks_user_id
            },
            "query": "query visionProfile($userId: String) {\n  visionProfile(userId: $userId) {\n"
                     "    result\n    hostName\n    userProfile {\n      ownerCount {\n        "
                     "fan\n        photo\n        follow\n        photo_public\n        __"
                     "typename\n      }\n      profile {\n        gender\n        user_name\n        "
                     "user_id\n        headurl\n        user_text\n        user_profile_bg_url\n        "
                     "__typename\n      }\n      isFollowing\n      __typename\n    }\n    __typename\n  }\n}\n"
        }
        upper_data = requests.post(url=self.ks_base_url, headers=ks_headers, data=json.dumps(ks_upper_data)).json()
        return self.upper_info_detail(upper_data['data']['visionProfile']['userProfile'])


    # 构建用户信息
    def upper_info_detail(self, user_data):
        upper_infos = {}
        upper_infos['nickname'] = user_data['profile']['user_name']
        upper_infos['signature'] = user_data['profile']['user_text']
        upper_infos['uid'] = user_data['profile']['user_id']
        upper_infos['video_count'] = user_data['ownerCount']['photo_public']
        upper_infos['avatar'] = user_data['profile']['headurl']
        upper_infos['attention'] = user_data['ownerCount']['follow']
        upper_infos['fans'] = user_data['ownerCount']['fan']
        upper_infos['create_time'] = int(time.time())
        print("用户信息", upper_infos)
        userdir = os.path.join(self.sdir,user_data['profile']['user_name'])
        if not os.path.exists(userdir):
            os.mkdir(userdir)
        return userdir


    # 构建快手 video apiData
    def ks_video_api_data(self, ks_user_id, ks_headers, pcursor_index=""):
        ks_video_data = {
            "operationName": "visionProfilePhotoList",
            "variables": {
                "userId": ks_user_id,
                "pcursor": pcursor_index,
                "page": "profile"
            },
            "query": "fragment photoContent on PhotoEntity {\n  id\n  duration\n  "
                     "caption\n  likeCount\n  viewCount\n  realLikeCount\n  coverUrl\n  "
                     "photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  "
                     "coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  "
                     "animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  "
                     "profileUserTopPhoto\n  __typename\n}\n\nfragment feedContent on Feed {\n  "
                     "type\n  author {\n    id\n    name\n    headerUrl\n    following\n    "
                     "headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  "
                     "photo {\n    ...photoContent\n    __typename\n  }\n  canAddComment\n  "
                     "llsid\n  status\n  currentPcursor\n  __typename\n}\n\nquery "
                     "visionProfilePhotoList($pcursor: String, $userId: String, $page: String, "
                     "$webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: "
                     "$userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    "
                     "webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    "
                     "hostName\n    pcursor\n    __typename\n  }\n}\n"
        }
        video_data = requests.post(url=self.ks_base_url, headers=ks_headers, data=json.dumps(ks_video_data)).json()
        return video_data['data']['visionProfilePhotoList']


    # 构建 视频信息
    def handle_video_save(self, video_datas):
        i = 0 
        num = len(video_datas)
        for videoItem in video_datas:
            i = i + 1
            video_title_data = videoItem['photo']['caption'].split()
            title = video_title_data[0]
            desc =videoItem['photo']['caption']
            video_url = videoItem['photo']['photoUrl']
            duration = int(videoItem['photo']['duration'])
            create_time = videoItem['photo']['timestamp'] / 1000
            timeArray = time.localtime(create_time)
            addtime = time.strftime("%Y-%m-%d", timeArray)
            dtime = duration/60000
            video_name = os.path.join(self.userdir,("%s.mp4" % addtime))
            if not os.path.isfile(video_name) and duration>=40000 :
                print("正在下载第%s/%s个：%s" % (i,num,title))
                video = self.get_video(video_url)
                with open(video_name, 'wb') as f:
                    f.write(video)
                f.close()
                file_info = os.path.join(self.userdir,("%s｜%s.txt" % (addtime,title)))
                info = "title:%s \n duration:%s分钟 \n addtime:%s \n video_url:%s \n " % (desc,dtime,addtime,video_url)
                with open(file_info, 'w' ,encoding='utf-8') as f:
                    f.write(info)
                f.close()
            else:
                print("正在下载第%s/%s个：%s分钟，长度小于1分钟，放弃下载" % (i,num,dtime))
                


    # 构建 视频信息
    def get_video(self, video_url):
        v_header = {
            'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1',
            'Authority': 'v2.kwaicdn.com',
            'Scheme': 'https',
            'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"
        }
        video = requests.get(url=video_url, headers=v_header).content
        return video

    # 构建 视频请求 信息 && 合并为一起
    def handle_video_info(self, ks_user_id, ks_headers):
        the_video_datas = []
        page_index = ""
        i = 0
        while page_index != 'no_more':
            i = i + 1
            print("page:%s" % i)
            video_data = self.ks_video_api_data(ks_user_id, ks_headers, page_index)
            page_index = video_data['pcursor']
            the_video_datas = video_data['feeds']
            time.sleep(20)  # 缩短等待时间会出验证码
            self.handle_video_save(the_video_datas)




if __name__ == "__main__":
    profile_url = ["https://www.kuaishou.com/profile/3xixde2qmm89hzq","https://www.kuaishou.com/profile/3xhwuat7zvnijd9","https://www.kuaishou.com/profile/3xhyg9xhp9hxyhg","https://www.kuaishou.com/profile/3x9r2wjh7cb2f8k","https://www.kuaishou.com/profile/3xiuiw88ywkmjqq"]
    for url in profile_url:
        ks = KsStd()
        ks.req_monitor(url)