python 爬取 kuaishou视频

获取用户首页的视频列表

转为每个视频首页的url 保存到文件

https://live.kuaishou.com/profile/XXXXXXX 是用户首页

import requests
import re
import json

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36","Cookie":"55kuaishou.live.bfb1s=9b8f70844293bed778aade6e0a8f9942; clientid=3; did=web_e443327b4b4949f5887c62c6deaf03a9; client_key=65890b29; didv=1578472661000; userId=165336717; userId=165336717; kuaishou.live.web_st=ChRrdWFpc2hvdS5saXZlLndlYi5zdBKgAWPr5FbsICKzDZ0kZws3AUW3yUrsS2o0J2E-ObC1HJYv6YoJFua0LzwEBSJxm7yZqUbZEx0XuvKeuoznP3vxy9cLUMGQz28flV0_HXi0ZXYsIRIx1KWvKzML7ViSbuaG5lPhujOykWu17XDL3_AVP6w7rOaPl4XXzp8D1EHdzNU3LtXQTAoifGqUmMFcYU21eE-VfjC2FYiSFG0sYTBnJ3UaEo_d-PiuxE4duU2DjxXdbB5BSiIgpP7OcoI9t9q4JKF4inMtRFIY4ztGxK1hzFhjCLhZ6REoBTAB; kuaishou.live.web_ph=b0fee16b1015d885056bfe536708a2753d4f"}


response = requests.get("https://live.kuaishou.com/profile/yuhaiying11", params = {}, headers = headers)
pattern = re.compile('__=(.*?);\(function')
match = re.search(pattern,response.text)
url = match.group().replace('__=', "");
url=url.replace(';(function', "");
data = json.loads(url)
aaaa = data['clients']['graphqlServerClient']
aaaa = json.dumps(aaaa);
pattern0 = re.compile('\[{(.*?)}\]')
match0 = re.search(pattern0,aaaa)
array = json.loads(match0.group())

print(array[0])

f = open('a.txt', 'w')
for obj in array:
   id = obj['id'].replace('VideoFeed:', "");
   f.write('https://live.kuaishou.com/u/yuhaiying11/'+id+'?did=web_e443327b4b4949f5887c62c6deaf03a9\r\n')
f.close()

获得a.txt 首页里每个视频首页的url

获得用户首页下拉时 页面里 增加的视频 首页url

这里部分参数,需要手动在浏览器中获取

import requests
import re
import json

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36","Cookie":"55kuaishou.live.bfb1s=9b8f70844293bed778aade6e0a8f9942; clientid=3; did=web_e443327b4b4949f5887c62c6deaf03a9; client_key=65890b29; didv=1578472661000; userId=165336717; userId=165336717; kuaishou.live.web_st=ChRrdWFpc2hvdS5saXZlLndlYi5zdBKgAWPr5FbsICKzDZ0kZws3AUW3yUrsS2o0J2E-ObC1HJYv6YoJFua0LzwEBSJxm7yZqUbZEx0XuvKeuoznP3vxy9cLUMGQz28flV0_HXi0ZXYsIRIx1KWvKzML7ViSbuaG5lPhujOykWu17XDL3_AVP6w7rOaPl4XXzp8D1EHdzNU3LtXQTAoifGqUmMFcYU21eE-VfjC2FYiSFG0sYTBnJ3UaEo_d-PiuxE4duU2DjxXdbB5BSiIgpP7OcoI9t9q4JKF4inMtRFIY4ztGxK1hzFhjCLhZ6REoBTAB; kuaishou.live.web_ph=b0fee16b1015d885056bfe536708a2753d4f","Content-Type":"application/json","accept":"*/*","Accept-Language":"zh-CN,zh;q=0.9,en;q=0.8"}


json0 = {
    "operationName":"publicFeedsQuery",
    "variables":{
        "principalId": "yuhaiying11",
		"pcursor": "1.552892860461E12",
		"count": 500
    },
    "query": "query publicFeedsQuery($principalId: String, $pcursor: String, $count: Int) {  publicFeeds(principalId: $principalId, pcursor: $pcursor, count: $count) {    pcursor    live {      user {        id        avatar        name        __typename      }      watchingCount      poster      coverUrl      caption      id      playUrls {        quality        url        __typename      }      quality      gameInfo {        category        name        pubgSurvival        type        kingHero        __typename      }      hasRedPack      liveGuess      expTag      __typename    }    list {      id      thumbnailUrl      poster      workType      type      useVideoPlayer      imgUrls      imgSizes      magicFace      musicName      caption      location      liked      onlyFollowerCanComment      relativeHeight      timestamp      width      height      counts {        displayView        displayLike        displayComment        __typename      }      user {        id        eid        name        avatar        __typename      }      expTag      __typename    }    __typename  }}"
}


response = requests.post("https://live.kuaishou.com/m_graphql", json=json0, headers = headers)
response.encoding='utf-8'
#print(response.text)
array = json.loads(response.text,encoding='utf-8')
listss = array['data']['publicFeeds']['list'];
#print(listss)

f = open('b.txt', 'w')
for obj in listss:
   id = obj['id'];
   f.write('https://live.kuaishou.com/u/yuhaiying11/'+id+'?did=web_e443327b4b4949f5887c62c6deaf03a9\r\n')
f.close()

这里获取了分页,也条数设置为500,

如果想获取第二页,需要请求第一页获得的 pcursor 参数 ,设置为第二页的请求参数,就可以请求第二页 的500条

 

 

 

获得b.txt 文件

 

然后读取这两个文件

在视频首页中获得播放url

然后下载

import requests
import re
import fileinput

def functionname( url_ ):
    #"https://live.kuaishou.com/u/yuhaiying11/3xj2txve5ntjn8e?did=web_e443327b4b4949f5887c62c6deaf03a9"
    response = requests.get(url_, params = {}, headers = headers)
    #print(response.text)
    pattern = re.compile('"playUrl":"http:(.*?).mp4')
    match = re.search(pattern,response.text)
    url = match.group().replace('"playUrl":"', "");
    url = url.encode('utf-8').decode('unicode_escape');#转码
    return url


headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36","Cookie":"55kuaishou.live.bfb1s=ac5f27b3b62895859c4c1622f49856a4; clientid=3; did=web_8fb1ca7b6de24f8cbfd3fb1994bf3a77; client_key=65890b29; didv=1578653936000; userId=165336717; userId=165336717; kuaishou.live.web_st=ChRrdWFpc2hvdS5saXZlLndlYi5zdBKgARTvvKnBFu2hJful3C6EOswrlYnEo1vhjljvRsrSFfn-mZ4qABpG9zVeu1LU3TJYbECqVeKBjktAfIkG71mkFF9zEkGA_tqLqB97uI7fySGeRJcxH7gYHVX8eQKO5JJQb2LbgSv3KAlQkJkfZnq6_K_XAvfkBHPCKTSj9dOrZv2XxHgJQTT2DSmQJztLDJqwNjssx25sEbSkgzz0Zt2rOiIaEqoO82cRG00nqSoI30_iGx2JSCIgODuEtIDjb8f2J5K1FmGOsuomQWw0V5nY9LY9NfKqAGgoBTAB; kuaishou.live.web_ph=c10764d1d4010ff90cdd4682472a327edf35"}


file = open("b.txt")
for eachline in file:
    if eachline.isspace() != True :
        url=functionname(eachline)
        print (url)
        urlll=eachline.replace('https://live.kuaishou.com/u/yuhaiying11/','').replace('?did=web_e443327b4b4949f5887c62c6deaf03a9','')
        print (urlll)
        r = requests.get(url)
        urlll=urlll.replace('\n','')
        urlll='C:\\Users\\Administrator\\Desktop\\pyks\\file\\'+urlll+'.mp4';
        open(urlll, 'wb').write(r.content)

由于快手没有开放API接口,所以我们只能通过模拟浏览器来爬取数据。 首先,我们需要安装selenium和chromedriver。selenium是一个自动化测试工具,可以模拟用户在浏览器中的操作。chromedriver是一个驱动程序,可以在代码中控制Chrome浏览器。 安装完selenium和chromedriver后,我们可以编写代码来模拟用户登录快手并获取收藏的视频。 代码如下: ```python from selenium import webdriver import time # 打开浏览器 driver = webdriver.Chrome() # 打开快手登录页面 driver.get('https://login.kuaishou.com/web/login') # 等待页面加载完成 time.sleep(5) # 输入账号密码 username = 'your_username' password = 'your_password' driver.find_element_by_name('username').send_keys(username) driver.find_element_by_name('password').send_keys(password) # 点击登录按钮 driver.find_element_by_class_name('login-button').click() # 等待登录成功 time.sleep(5) # 打开收藏页面 driver.get('https://live.kuaishou.com/profile/favorites') # 等待页面加载完成 time.sleep(5) # 获取所有视频的信息 videos = driver.find_elements_by_class_name('video-card') # 遍历所有视频并输出标题和链接 for video in videos: title = video.find_element_by_class_name('title').text link = video.find_element_by_tag_name('a').get_attribute('href') print(title, link) # 关闭浏览器 driver.quit() ``` 需要注意的是,由于快手页面一直在更新,上述代码在某些情况下可能无法正常运行。如果遇到问题,可以尝试手动更改代码中的元素定位方式。
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值