importrequests,os,time,sys,reimporturllib.requestimportrequestsfrom bs4 importBeautifulSoupimportosfrom scrapy.selector importSelectorclasswangyiyun():def __init__(self):
self.headers={‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36‘,‘Referer‘: ‘http://music.163.com/‘}
self.main_url=‘http://music.163.com/‘self.session=requests.Session()
self.session.headers=self.headers
self.song_list={}defget_songurls(self,playlist,long):‘‘‘进入所选歌单页面,得出歌单里每首歌各自的ID 形式就是“song?id=64006"‘‘‘url=self.main_url+‘playlist?id=%d‘%playlist
re= self.session.get(url) #直接用session进入网页
sel=Selector(text=re.text) #用scrapy的Selector,懒得用BS4了
songurls=sel.xpath(‘//ul[@class="f-hide"][email protected]‘).extract()
songurls= songurls[:long] #这里我只选取了列表的前long个元素
return songurls #所有歌曲组成的list
##[‘/song?id=64006‘, ‘/song?id=63959‘, ‘/song?id=25642714‘, ‘/song?id=63914‘, ‘/song?id=4878122‘, ‘/song?id=63650‘]
defget_songinfo(self,songurl):‘‘‘根据songid进入每首歌信息的网址,得到歌曲的信息
return:‘64006‘,‘陈小春-失恋王‘‘‘url=self.main_url+songurl
re=self.session.get(url)
sel=Selector(text=re.text)
song_id= url.split(‘=‘)[1]
song_name= sel.xpath("//em[@class=‘f-ff2‘]/text()").extract_first()
singer= ‘&‘.join(sel.xpath("//p[@class=‘des s-fc4‘]/span/a/text()").extract())
songname=singer +‘-‘+song_name
b=[‘\\‘,‘/‘,‘:‘,‘*‘,‘?‘,‘"‘,‘‘,‘"‘]for x inb:if x insongname:
songname=songname.replace(x,‘‘)returnstr(song_id),songnamedefdownload_song(self, i, songurl, dir_path):‘‘‘根据歌曲url,下载mp3文件‘‘‘
try:
song_id, songname= self.get_songinfo(songurl) #根据歌曲url得出ID、歌名
song_url = ‘http://music.163.com/song/media/outer/url?id=%s.mp3‘%song_id
path= dir_path + os.sep + songname + ‘.mp3‘ #文件路径
ifos.path.exists(path):print(str(i), songname+‘exists!‘,sep=‘.‘)else:
song=requests.get(song_url,headers =self.headers)
with open(path,‘wb‘) as f: #下载文件
print(str(i),songname+‘.mp3‘,sep=‘.‘)
f.write(song.content)
self.song_list[i]=pathexceptException as error_info:print(‘Error! =======‘ +songname)pass
deffileSize(self):"""判断下载下来的文件的大小,如果太小,就删掉。"""
for num inself.song_list:
file=self.song_list[num]
fsize=os.path.getsize(file)
f_kb= fsize/float(1024)if f_kb < 1024:
os.unlink(file)print(str(num),file.split(‘\\‘)[-1] + ‘Small than 1M. Has been deleted.‘,sep=‘.‘)defwork(self, playlist,long,path):
songurls= self.get_songurls(playlist,long) #输入歌单编号,得到歌单所有歌曲的url
i = 1
for songurl insongurls:
self.download_song(i,songurl, path)#下载歌曲
i += 1self.fileSize()if __name__ == ‘__main__‘:
long= int(input("How many songs do you want to download?\n"))
path= r‘D:\备份\新建文件夹‘d=wangyiyun()
d.work(400931460,long,path) #400931460是网页版歌单网址中 id 后面的参数