python有声读书_Python爬取喜马拉雅有声书

# 导入第三方库

import requests

import parsel

# 模拟浏览器

headers = {

"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}

def download_media(media_url, media_name):

"""定义下载模块"""

response = requests.get(media_url, headers=headers)

with open(f'{media_name}.mp4', mode="wb") as f:

f.write(response.content)

def media_api(track_id):

"""从网页获取下载地址"""

api_url = f"https://www.ximalaya.com/revision/play/v1/audio?id={track_id}&ptype=1"

resepons = requests.get(api_url, headers=headers)

data = resepons.json()

src = data["data"]["src"]

return src

def get_total_page(page_url):

"""请求每页模块"""

response = requests.get(page_url, headers=headers)

sel = parsel.Selector(response.text)

sound_list = sel.css(".sound-list ul li a")

for sound in sound_list[:30]:

media_url = sound.css("a::attr(href)").extract_first()

media_url = media_url.split("/")[-1]

media_name = sound.css("a::attr(title)").extract_first()

yield media_url, media_name

if __name__ == '__main__':

"""主程序"""

# print(media_api(152693861))

# 循环遍历多页

for page in range(1, 24):

medias = get_total_page(f"https://www.ximalaya.com/youshengshu/20642967/p{page}")

for media_id, media_name in medias:

media_url = media_api(media_id)

download_media(media_url, media_name)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值