import requests
import re
def main():
for i in range(1, 39): # 页数
url = "https://www.ximalaya.com/revision/album/v1/getTracksList?albumId=4756811&pageNum=%d&sort=0" % i
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36"
}
url_get = requests.get(url=url, headers=headers)
for date in url_get.json()["data"]["tracks"]:
url, name = date["trackId"], date["title"]
url = "https://www.ximalaya.com/revision/play/v1/audio?id=%s&ptype=1" % url
url_get = requests.get(url=url, headers=headers)
url_date = re.findall('"src":"(.*?)"', url_get.text)
url_get = requests.get(url=url_date[0], headers=headers)
with open("./爬取的数据/%s.m4a" % name, "wb") as wb:
wb.write(url_get.content)
wb.close()
print("%s爬取数据结束!!!" % name)
if __name__ == '__main__':
main()
爬虫案例---喜马拉雅(可下一般的音频和优选,除VIP外)
最新推荐文章于 2024-07-01 17:20:06 发布
该代码示例展示了如何利用Python的requests库爬取喜马拉雅网站上的音频资源。通过循环遍历不同页面,获取专辑中每个音频的详细信息,并下载对应的m4a文件。整个过程涉及HTTP请求、JSON解析以及正则表达式匹配来抓取音频源URL。
摘要由CSDN通过智能技术生成