前言
本文章非逆向破解,方法是PC端快手视频采集。
PC端的接口,比较好拿。只需登录快手账号,使用抓包工具,即能抓到接口。
爬取需要cookie,只需一个参数did=web_xxxxxxxxxxxxxxxx;获取方式很简单,随便复制一个短链接,到浏览器,打开,复制did=xxxxxxxxx,即可。
直接上代码!
from lxml import etree
import re,requests,json,time
class KuaiShou(object):
def __init__(self,collect_urls_list):
self.collect_urls_list = collect_urls_list
# cookie需要自行获取,目前测试有效期至少4天,不需更换。
self.cookies = 'did=xxxxxxxxxx;',
# 大量爬取需要加代理
self.proxies = ''
for i in range(len(self.collect_urls_list)):
url = self.getKuaiShouRealAddress(self.collect_urls_list[i])
if not url:continue
print(url)
# 单个视频url
if re.search('/fw/photo/', url):
print('单个视频')
self.requestsSingleVideo(url)
# 用户视频列表
if re.search('/fw/user/',url):
print('用户作品')
user_id = re.findall("/fw/user/(.*)\?",url)[0]
fid = re.findall('fid=(\d+)',url)[0]
share_id = re.findall('shareId=(\d+)',url)[0]
self.requestsUserVideo(user_id,fid,share_id)
def getKuaiShouRealAddress(self, url):
HEADERS = {
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Linux; Android 4.3; Nexus 7 Build/JSS15Q) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.72 Safari/537.36'
}
res = requests.get(url, headers=HEADERS, allow_redirects=False, verify=False)
if res.status_code == 302:
long_url = res.headers['Location']
return long_url
def requestsSingleVideo(self,url):
long_url = url
try:
headers2 = {
'Host': 'npstianjin.s.kuaishou