#-*- coding: UTF-8 -*-
from pyquery importPyQuery as pyfrom lxml importetreeimporturllibimportreimportosimportsysimportloggingdefformat(filename):
tuple=(' ','’','\'')for char intuple:if (filename.find(char)!=-1):
filename=filename.replace(char,"_")returnfilenamedefdownload_mp3(mp3_url, filename,dir):
f= dir+"\\"+filenameifos.path.exists(f):
logger.debug(f+"is existed.")return
try:
open(f,'wb').write(urllib.urlopen(mp3_url).read())
logger.debug( filename+ 'is downloaded.')except:
logger.debug( filename+ 'is not downloaded.')defdownload_all_mp3(start,end,dir,logger):for x inrange(start,end):try:
url= "http://www.youban.com/mp3-d" + str(x) + ".html"logger.debug(str(x)+ ":"+url)
doc= py(url=url)
e= doc('.mp3downloadbox')if e is None or e == '':
logger.debug(url+"is not existed.")returne=unicode(e)#logger.debug( e)
regex = re.compile(ur".*
(.*)
.*downloadboxlist.*?m=regex.search(e)if m is notNone:
title= m.group(1).strip()
title2= str(x)+"_"+title + ".mp3"
#title2 = re.sub(' ','_',title2)
title2 =format(title2)
link= m.group(2)#logger.debug( "title:" + title + " link:" + link)
if link == '' or title == '':
logger.debug(url+ "is not useful")continuelogger.debug(str(x)+":"+link)
download_mp3(link,title2,dir)except:
logger.debug(url+"met exception.")continue
if __name__ == "__main__":
dir_root= "e:\\song"
if sys.argv[3] != '': dir_root=sys.argv[3]
start,end= 1,8000
if sys.argv[1] >= 0 and sys.argv[2]>=0:
start,end= int(sys.argv[1]),int(sys.argv[2])print ("Download from %s to %s.\n" %(start,end))
dir= dir_root + "\\"+str(start)+"-"+str(end)if notos.path.exists(dir):
os.mkdir(dir)print "Download to" + dir + ".\n"logger= logging.getLogger("simple")
logger.setLevel(logging.DEBUG)
fh= logging.FileHandler(dir+"\\"+"download.log")
ch=logging.StreamHandler()
formatter= logging.Formatter("%(message)s")
ch.setFormatter(formatter)
fh.setFormatter(formatter)
logger.addHandler(ch)
logger.addHandler(fh)
download_all_mp3(start,end,dir,logger)