#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2006 UbuntuChina <http://www.ubuntu.org.cn>
# License: GPLv2
# Author: oneleaf <oneleaf AT gmail.com>
import httplib
import re
import urllib
import os
import locale
def getdownurl(url):
urllist=[]
conn = httplib.HTTPConnection('mp3.baidu.com')
conn.request("GET",url)
response = conn.getresponse()
html=response.read()
conn.close()
expression='http://220.181.27.54/m(.*)</a>'
listSentence = re.findall(expression, html)
lineno=0
while lineno<len(listSentence):
mp3url=re.search('title=(.*)onclick',listSentence[lineno])
if mp3url:
mp3url=mp3url.group(0)
mp3url=re.search('http(\S*)',mp3url)
if mp3url:
mp3url=mp3url.group(0)
try:
mp3url=mp3url.decode('gbk')
except:pass
urllist.append(mp3url)
lineno+=2
return urllist
def downmp3(url,author,name,filelist):
filename=author+"-"+name;
for i in filelist:
name=unicode(i,locale.getpreferredencoding())
if name.find(filename) == 0:
print u"文件已经下载,忽略。"
return 1
urllists=getdownurl(url)
for i in urllists:
print u"正在连接",i
ext=i[-4:]
try:
urlopen = urllib.URLopener()
fp=urlopen.open(i)
data = fp.read()
fp.close()
filename=filename+ext;
file=open(filename,'w+b')
file.write(data)
file.close()
print u"下载成功!"
return 1
except:
continue
return 0
if __name__ == "__main__":
conn = httplib.HTTPConnection('list.mp3.baidu.com')
conn.request("GET",'/list/newhits.html?id=1')
response = conn.getresponse()
html=response.read().decode('gbk')
conn.close()
expression='<a href="http://mp3.baidu.com/m(.*)</a>'
listSentence = re.findall(expression, html)
lineno=0
while lineno<len(listSentence):
url=re.search('(.*)target',listSentence[lineno])
url='/m'+url.group(0)[:-8]
name=re.search('blank>(.*)',listSentence[lineno])
name=name.group(0)[6:]
author=re.search('blank>(.*)',listSentence[lineno+1])
author=author.group(0)[6:]
print u"开始下载",author,name
filelist=os.listdir('.');
if downmp3(url,author,name,filelist)==0:
print u"下载",author,name,u'失败!'
lineno+=2
Python下载百度新歌100的代码
最新推荐文章于 2024-09-09 00:00:00 发布