python 爬取 kuaishou视频

最新推荐文章于 2023-10-14 19:18:33 发布

qq_31683775

最新推荐文章于 2023-10-14 19:18:33 发布

阅读量7.3k

点赞数 1

分类专栏： python

本文链接：https://blog.csdn.net/qq_31683775/article/details/103929663

版权

python 专栏收录该内容

15 篇文章

订阅专栏

本文介绍了一种使用Python从快手平台抓取用户主页视频列表的方法，并详细解释了如何解析视频首页URL，以及如何进一步获取视频播放地址进行下载。通过正则表达式和JSON操作，实现了对网页内容的有效提取。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

获取用户首页的视频列表

转为每个视频首页的url 保存到文件

https://live.kuaishou.com/profile/XXXXXXX 是用户首页

import requests
import re
import json

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36","Cookie":"55kuaishou.live.bfb1s=9b8f70844293bed778aade6e0a8f9942; clientid=3; did=web_e443327b4b4949f5887c62c6deaf03a9; client_key=65890b29; didv=1578472661000; userId=165336717; userId=165336717; kuaishou.live.web_st=ChRrdWFpc2hvdS5saXZlLndlYi5zdBKgAWPr5FbsICKzDZ0kZws3AUW3yUrsS2o0J2E-ObC1HJYv6YoJFua0LzwEBSJxm7yZqUbZEx0XuvKeuoznP3vxy9cLUMGQz28flV0_HXi0ZXYsIRIx1KWvKzML7ViSbuaG5lPhujOykWu17XDL3_AVP6w7rOaPl4XXzp8D1EHdzNU3LtXQTAoifGqUmMFcYU21eE-VfjC2FYiSFG0sYTBnJ3UaEo_d-PiuxE4duU2DjxXdbB5BSiIgpP7OcoI9t9q4JKF4inMtRFIY4ztGxK1hzFhjCLhZ6REoBTAB; kuaishou.live.web_ph=b0fee16b1015d885056bfe536708a2753d4f"}


response = requests.get("https://live.kuaishou.com/profile/yuhaiying11", params = {}, headers = headers)
pattern = re.compile('__=(.*?);\(function')
match = re.search(pattern,response.text)
url = match.group().replace('__=', "");
url=url.replace(';(function', "");
data = json.loads(url)
aaaa = data['clients']['graphqlServerClient']
aaaa = json.dumps(aaaa);
pattern0 = re.compile('\[{(.*?)}\]')
match0 = re.search(pattern0,aaaa)
array = json.loads(match0.group())

print(array[0])

f = open('a.txt', 'w')
for obj in array:
   id = obj['id'].replace('VideoFeed:', "");
   f.write('https://live.kuaishou.com/u/yuhaiying11/'+id+'?did=web_e443327b4b4949f5887c62c6deaf03a9\r\n')
f.close()

获得a.txt 首页里每个视频首页的url

获得用户首页下拉时页面里增加的视频首页url

这里部分参数,需要手动在浏览器中获取

import requests
import re
import json

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36","Cookie":"55kuaishou.live.bfb1s=9b8f70844293bed778aade6e0a8f9942; clientid=3; did=web_e443327b4b4949f5887c62c6deaf03a9; client_key=65890b29; didv=1578472661000; userId=165336717; userId=165336717; kuaishou.live.web_st=ChRrdWFpc2hvdS5saXZlLndlYi5zdBKgAWPr5FbsICKzDZ0kZws3AUW3yUrsS2o0J2E-ObC1HJYv6YoJFua0LzwEBSJxm7yZqUbZEx0XuvKeuoznP3vxy9cLUMGQz28flV0_HXi0ZXYsIRIx1KWvKzML7ViSbuaG5lPhujOykWu17XDL3_AVP6w7rOaPl4XXzp8D1EHdzNU3LtXQTAoifGqUmMFcYU21eE-VfjC2FYiSFG0sYTBnJ3UaEo_d-PiuxE4duU2DjxXdbB5BSiIgpP7OcoI9t9q4JKF4inMtRFIY4ztGxK1hzFhjCLhZ6REoBTAB; kuaishou.live.web_ph=b0fee16b1015d885056bfe536708a2753d4f","Content-Type":"application/json","accept":"*/*","Accept-Language":"zh-CN,zh;q=0.9,en;q=0.8"}


json0 = {
    "operationName":"publicFeedsQuery",
    "variables":{
        "principalId": "yuhaiying11",
		"pcursor": "1.552892860461E12",
		"count": 500
    },
    "query": "query publicFeedsQuery($principalId: String, $pcursor: String, $count: Int) {  publicFeeds(principalId: $principalId, pcursor: $pcursor, count: $count) {    pcursor    live {      user {        id        avatar        name        __typename      }      watchingCount      poster      coverUrl      caption      id      playUrls {        quality        url        __typename      }      quality      gameInfo {        category        name        pubgSurvival        type        kingHero        __typename      }      hasRedPack      liveGuess      expTag      __typename    }    list {      id      thumbnailUrl      poster      workType      type      useVideoPlayer      imgUrls      imgSizes      magicFace      musicName      caption      location      liked      onlyFollowerCanComment      relativeHeight      timestamp      width      height      counts {        displayView        displayLike        displayComment        __typename      }      user {        id        eid        name        avatar        __typename      }      expTag      __typename    }    __typename  }}"
}


response = requests.post("https://live.kuaishou.com/m_graphql", json=json0, headers = headers)
response.encoding='utf-8'
#print(response.text)
array = json.loads(response.text,encoding='utf-8')
listss = array['data']['publicFeeds']['list'];
#print(listss)

f = open('b.txt', 'w')
for obj in listss:
   id = obj['id'];
   f.write('https://live.kuaishou.com/u/yuhaiying11/'+id+'?did=web_e443327b4b4949f5887c62c6deaf03a9\r\n')
f.close()

这里获取了分页,也条数设置为500,

如果想获取第二页,需要请求第一页获得的 pcursor 参数 ,设置为第二页的请求参数,就可以请求第二页的500条

获得b.txt 文件

然后读取这两个文件

在视频首页中获得播放url

然后下载

import requests
import re
import fileinput

def functionname( url_ ):
    #"https://live.kuaishou.com/u/yuhaiying11/3xj2txve5ntjn8e?did=web_e443327b4b4949f5887c62c6deaf03a9"
    response = requests.get(url_, params = {}, headers = headers)
    #print(response.text)
    pattern = re.compile('"playUrl":"http:(.*?).mp4')
    match = re.search(pattern,response.text)
    url = match.group().replace('"playUrl":"', "");
    url = url.encode('utf-8').decode('unicode_escape');#转码
    return url


headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36","Cookie":"55kuaishou.live.bfb1s=ac5f27b3b62895859c4c1622f49856a4; clientid=3; did=web_8fb1ca7b6de24f8cbfd3fb1994bf3a77; client_key=65890b29; didv=1578653936000; userId=165336717; userId=165336717; kuaishou.live.web_st=ChRrdWFpc2hvdS5saXZlLndlYi5zdBKgARTvvKnBFu2hJful3C6EOswrlYnEo1vhjljvRsrSFfn-mZ4qABpG9zVeu1LU3TJYbECqVeKBjktAfIkG71mkFF9zEkGA_tqLqB97uI7fySGeRJcxH7gYHVX8eQKO5JJQb2LbgSv3KAlQkJkfZnq6_K_XAvfkBHPCKTSj9dOrZv2XxHgJQTT2DSmQJztLDJqwNjssx25sEbSkgzz0Zt2rOiIaEqoO82cRG00nqSoI30_iGx2JSCIgODuEtIDjb8f2J5K1FmGOsuomQWw0V5nY9LY9NfKqAGgoBTAB; kuaishou.live.web_ph=c10764d1d4010ff90cdd4682472a327edf35"}


file = open("b.txt")
for eachline in file:
    if eachline.isspace() != True :
        url=functionname(eachline)
        print (url)
        urlll=eachline.replace('https://live.kuaishou.com/u/yuhaiying11/','').replace('?did=web_e443327b4b4949f5887c62c6deaf03a9','')
        print (urlll)
        r = requests.get(url)
        urlll=urlll.replace('\n','')
        urlll='C:\\Users\\Administrator\\Desktop\\pyks\\file\\'+urlll+'.mp4';
        open(urlll, 'wb').write(r.content)