爬取全民K歌主页上的歌
import urllib.request
import requests
import os
import re
path = "C:\\Users\\HUAWEI\\Desktop\\spider\\kg"
url = "https://kg.qq.com/node/personal?uid=6a9d9a81222830833c"
headers = {'User-Agent':'User-Agent:Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'}
req = urllib.request.Request(url,headers=headers)
res = urllib.request.urlopen(req)
html = res.read().decode("utf-8")
pat = '<a href="(.*?)" .*? target="_blank">(.*?)</a>'
dlist = re.findall(pat,html)
pat_music = 'http://[a-z][a-z].stream.kg.qq.com.*.m4a.*?"'
for root,dirs,files in os.walk(path):
print('root:',root)
print('dirs',dirs)
print('files',files)
for url in dlist:
music = urllib.request.Request(url[0],headers=headers)
res = urllib.request.urlopen(music)
music_html = res.read().decode("utf-8")
mus = re.findall(pat_music,music_html)
if mus:
mus_name = url[1] +".mp3"
if(mus_name not in files):
MP3 = requests.get(mus[0])
if(path[-1]=='\\'):
path_url = path+mus_name
else:
path_url = path+'\\'+mus_name
else:
MP3 = requests.get(mus[0])
yesorno = input("是否需要覆盖:")
if(yesorno=='y'):
path_url = path+'\\'+mus_name
else:
mus_name = input("重命名:")
path_url = path+'\\'+mus_name
with open(path_url,"wb") as f:
f.write(MP3.content)