Python下载百度新歌100的代码

#!/usr/bin/python 
# -*- coding: utf-8 -*- 
# Copyright (c) 2006 UbuntuChina <http://www.ubuntu.org.cn> 
# License: GPLv2 
# Author: oneleaf <oneleaf AT gmail.com> 

import httplib 
import re 
import urllib 
import os 
import locale 

def getdownurl(url): 
    urllist=[] 
    conn = httplib.HTTPConnection('mp3.baidu.com') 
    conn.request("GET",url) 
    response = conn.getresponse() 
    html=response.read() 
    conn.close() 
    expression='http://220.181.27.54/m(.*)</a>' 
    listSentence = re.findall(expression, html) 
    lineno=0 
    while lineno<len(listSentence): 
        mp3url=re.search('title=(.*)onclick',listSentence[lineno]) 
        if mp3url: 
           mp3url=mp3url.group(0) 
           mp3url=re.search('http(\S*)',mp3url) 
           if mp3url: 
              mp3url=mp3url.group(0) 
              try: 
                  mp3url=mp3url.decode('gbk') 
              except:pass 
              urllist.append(mp3url) 
        lineno+=2 
    return urllist 

def downmp3(url,author,name,filelist): 
    filename=author+"-"+name; 
    for i in filelist: 
        name=unicode(i,locale.getpreferredencoding()) 
        if name.find(filename) == 0: 
            print u"文件已经下载,忽略。" 
            return 1 
    urllists=getdownurl(url) 
    for i in urllists:        
        print u"正在连接",i 
        
        ext=i[-4:] 
        try: 
            urlopen = urllib.URLopener() 
            fp=urlopen.open(i) 
            data = fp.read() 
            fp.close() 
            filename=filename+ext; 
            file=open(filename,'w+b') 
            file.write(data) 
            file.close() 
            print u"下载成功!" 
            return 1 
        except: 
            continue 
    return 0 

if __name__ == "__main__": 
    conn = httplib.HTTPConnection('list.mp3.baidu.com') 
    conn.request("GET",'/list/newhits.html?id=1') 
    response = conn.getresponse() 
    html=response.read().decode('gbk') 
    conn.close() 
    expression='<a href="http://mp3.baidu.com/m(.*)</a>' 
    listSentence = re.findall(expression, html) 
    lineno=0 
    while lineno<len(listSentence): 
       url=re.search('(.*)target',listSentence[lineno]) 
       url='/m'+url.group(0)[:-8] 
       name=re.search('blank>(.*)',listSentence[lineno]) 
       name=name.group(0)[6:] 
       author=re.search('blank>(.*)',listSentence[lineno+1]) 
       author=author.group(0)[6:] 
       print u"开始下载",author,name 
       filelist=os.listdir('.'); 
       if downmp3(url,author,name,filelist)==0: 
          print u"下载",author,name,u'失败!' 
       lineno+=2 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值