1 运行wget -i Baidump3URL.txt
2 运行BaiduMp3.py> baidump3.txt
3 运行del *.htm?
获得baidu所有歌曲的名字列表


Baidump3URL.txt:


http://list.mp3.baidu.com/song/A.htm
http://list.mp3.baidu.com/song/B.htm
http://list.mp3.baidu.com/song/C.htm
http://list.mp3.baidu.com/song/D.htm
http://list.mp3.baidu.com/song/E.htm
http://list.mp3.baidu.com/song/F.htm
http://list.mp3.baidu.com/song/G.htm
http://list.mp3.baidu.com/song/H.htm
http://list.mp3.baidu.com/song/J.htm
http://list.mp3.baidu.com/song/K.htm
http://list.mp3.baidu.com/song/L.htm
http://list.mp3.baidu.com/song/M.htm
http://list.mp3.baidu.com/song/N.htm
http://list.mp3.baidu.com/song/O.htm
http://list.mp3.baidu.com/song/P.htm
http://list.mp3.baidu.com/song/Q.htm
http://list.mp3.baidu.com/song/R.htm
http://list.mp3.baidu.com/song/S.htm
http://list.mp3.baidu.com/song/T.htm
http://list.mp3.baidu.com/song/W.htm
http://list.mp3.baidu.com/song/X.htm
http://list.mp3.baidu.com/song/Y.htm
http://list.mp3.baidu.com/song/Z.htm


BaiduMp3.py:


#!/usr/bin/python

import urllib
import string
import re

def GetContent (url):
  try:
    URLFile=urllib.urlopen(url)
  except IOError:
    print "\nCan not retrieve ",url,"!\nThe connection cannot be made!\n"
  else:
    HTMLText=URLFile.read()
    URLFile.close()
    return HTMLText


if(__name__=="__main__"):
  file=open('Baidump3URL.txt','r')
  fileread=file.read()
  urls=fileread.split('\n')
  
  queue=[]
  #
  regexp=re.compile(r'" target=_blank>(.*?)</[aA]></td>')
  for url in urls:
    #print url
    url=re.sub('http://list.mp3.baidu.com/song','.',url)
    #print url
    
    content=open(url,'r').read()
    lines=content.split('\n')
    
    for line in lines:
      #print line
      ccc=regexp.search(line)
      if(ccc):
        word=ccc.groups()[0]
        if word in queue:
          pass
        else:
          queue.append(word)
          
    #print url
  file.close()
  
  regexp1=re.compile(r'[- ](.*)')
  
  for w in queue:
  
    w = unicode(w,'cp936')
    w = w.encode('utf8')
    
    ccc=regexp1.search(w)
    if(ccc):
      w=ccc.groups()[0]
      #print w
        
    w=re.sub('《|》|,|\.|·|!','',w)
    if(''==w):
      continue
    print w