import requests import json import os headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36' } base_path = 'F:\\SxhMycode\\PYtest\\ead\\FM\\'#基地址 def mkdir(path):#创建文件夹 path = path.strip() path = path.rstrip("\\") isExists = os.path.exists(path) if not isExists: print(path + ' 创建成功') os.makedirs(path) page = 'youshengshu' #关键字搜索 theme_url = 'https://www.ximalaya.com/revision/getRankList?code='+page #大分类下的地址 start_url = 'https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&sort=-1&pageSize=30'#小分类下的地址 theme = requests.get(theme_url,headers=headers) #获取小方向下的所有FM电台歌单 ret = theme.content.decode() #获取代码以字符串形式表现出来 str = json.loads(ret) #解析json类型的字符串 substring = str['data']['albums'] #解析网页获取源码 for i in substring: albumTitle = i['albumTitle'] #获取歌单名 id = i['id'] #获取id path = base_path + albumTitle #获取歌单名创建文件夹 mkdir(path) for i in range(1): #根据所需歌曲来调大小 url = start_url.format(id , i + 1) #将start_url地址补充完整 r = requests.get(url,headers=headers) ret1 = r.content.decode() str1 = json.loads(ret1) substring1 = str1['data']['tracksAudioPlay'] #同上 for i in substring1: src = i['src'] name = i['trackName'] #下载备注的名字 with open(path+'/%s.m4a' % name, 'ab') as f: r = requests.get(src) f.write(r.content) print(name)
爬取喜马拉雅FM分类榜的代码分析
最新推荐文章于 2024-01-21 16:59:24 发布