php抓取喜马拉雅,Python爬取喜马拉雅免费相声音频V2

from requests import get

from lxml import etree

import os

try:

os.mkdir("Video")

os.chdir("Video")

except:

os.chdir("Video")

def open_url(url):

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36'}

res = get(url, headers=headers)

return res

def get_xs(res):

temp = 'https://www.ximalaya.com/xiangsheng'

html = etree.HTML(res.text)

name = html.xpath("//*[@class='general-album-list']/div[@class='content']/ul/li/div/a/span/text()")

href = [temp + each for each in html.xpath("//*[@class='general-album-list']/div[@class='content']/ul/li/div/a[1]/@href")]

i = 1

result = {}

for k in zip(name, href):

result[i] = k

i += 1

return result

def get_Videourl(nm, hf, page):

def tempfunc(list1):

for each in list1:

yield from each

vdurl = []  # 存放视频url的列表

name = []

for i in range(1, page + 1):

tempurl = hf + f'p{i}'

res = open_url(tempurl)

html = etree.HTML(res.text)

href = html.xpath('//*[@class="sound-list _Qp"]/ul/li/div[2]/a/@href')

name.append(html.xpath('//*[@class="sound-list _Qp"]/ul/li/div[2]/a/span/text()'))

ids = [each[-9:] for each in href] # 观察可知,id号在url的后9位,直接切片提出来

for id in ids:

vdurl.append('https://www.ximalaya.com/revision/play/v1/audio?id=%s&ptype=1' % id)

return vdurl, list(tempfunc(name))

def get_Video(vdurl, nm):

i = 0

for url in vdurl:

res = open_url(url).json()

for each in res:

if type(res[each]) != int:

tempurl = res[each]['src']

video = open_url(tempurl)

filename = f"{nm[i]}.m4a"

print("正在下载:",filename)

with open(filename, 'wb') as f:

f.write(video.content)

i += 1

print("下载完毕!")

def main():

url = 'https://www.ximalaya.com/xiangsheng/xiangsheng/mr132t2722/'

res = open_url(url)

result = get_xs(res)

for i in result:

for j in range(0,len(result[i]), 2):

print(i, end = ' ')

print(result[i][j])

choice = int(input("请选择您要听的专辑序号:"))

nm, hf = result[choice]

page = int(input("请选择您要爬取的页码:"))

vdurl, nm = get_Videourl(nm, hf, page)

get_Video(vdurl, nm)

if __name__ == "__main__":

main()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值