用python2.7爬取网易云音乐
想根据歌手爬取 ··但是网易云的那个官网··有一块歌手的链接没找到url在哪写的·· 截取的包也看了 但是 还是没找到 可能是学艺不精吧
所以我就选择了一个笨办法
#-*-encoding:utf-8-*-
import sys
import urllib2
import bs4
import requests
import json
import urllib
reload(sys)
sys.setdefaultencoding('utf-8')
def url_demo():
url = 'http://music.163.com/artist/album?id='
urls =[]
i = range(4000,4010)
for n in i:
nurl = url+str(n)
urls.append(nurl)
return urls
'''
歌曲专辑
'''
def find_gequ():
urls = url_demo()
songs_url = []
for url in urls:
try:
html = get_html(url)
soup = bs4.BeautifulSoup(html,'html.parser')
text_urls=[]
for i in soup.select('div.g-wrap6 > ul > li > p > a'):
text_urls.append('http://music.163.com'+i['href'])
except:
urls.remove(url)
if len(text_urls) != 0:
songs_url.append(text_urls)
return songs_url
'''
每个歌单的内容
'''
def get_gequ():
urls = find_gequ()
for i in urls:
song = []
for url in i:
html = get_html(url)
soup = bs4.BeautifulSoup(html,'html.parser')
text = soup.select('textarea')[0].get_text()
content = json.loads(text)
for n in content:
song .append([n['name'],n['artists'][0]['name']])
print i
print '正在写入'+url+'-'*20
write_xml(content[0]['artists'][0]['name'],i,song)
def write_xml(name,url,content):
w_name = name+'.xml'
f = open('E:\demo\\'+w_name,'wb')
f.write('<doc>\n')
f.write('\t<url>')
for i in url:
f.write(i+' , ')
f.write('\t</url>\n')
for i in content:
f.write('\t\t<song_name>%s</song_name>\n'%i[0])
f.write('\t\t<songer_name>%s</songer_name>\n'%i[1])
f.write('</doc>\n')
def get_html(url):
html = urllib2.urlopen(url).read().decode('utf-8')
return html
if __name__ == '__main__':
get_gequ()
当然 我是要拿到歌单而已 不是歌曲