抓取 虾米_抓取虾米歌曲信息

#-*- coding: utf-8 -*-

"""Created on Fri Aug 01 18:09:21 2014

@author: omom"""

importurllib2from bs4 importBeautifulSoup

src="http://www.xiami.com/widget/40537093_376239,185689,1769863609,3351090,1769863610,3562321,183942,1769736296,3418502,1770127750,1770201852,3351083,3351088,3351082,1769496545,1769496547,1769496546,3418497,_235_346_FF8719_494949_0/multiPlayer.swf"

#src="http://www.xiami.com/widget/40537093_1771331004,1771331002,55553,3478385,1769187978,380807,3478389,1770464110,55552,1771331001,380865,3478386,380834,380869,55670,55823,1772165872,55549,1769187987,380818,_235_346_FF8719_494949_0/multiPlayer.swf"#src="http://www.xiami.com/widget/40537093_380863,380832,55550,380830,380837,380861,380799,380866,380808,1770464109,55559,380860,1771512727,3478391,1771331023,55711,55556,380797,1769074612,380788,380810,3478387,380852,55705,55865,1769187981,380787,380862,1770464107,1771360882,55700,1770464108,55869,55867,3478388,380835,1769187983,3364419,1769115993,1771331005,_235_346_FF8719_494949_0/multiPlayer.swf"

a,b,c=src.split("_",2)

b=b.rstrip(",")

ids=b.split(",")

music_base="http://www.xiami.com/song/playlist/id/%s/object_name/default/object_id"

defdecrypt_url(s):

s=s.replace('^','0')

src=list(s)

rows_count=int(src.pop(0))

dst_list=[]

dst=[]

src_len=len(src)

row_len,reminder=divmod(src_len,rows_count)for i inrange(rows_count):

dst_list.append([])

start=stop=0for row_list indst_list:

stop=start+row_lenif reminder>0:

stop+=1reminder-=1row_list.extend(src[start:stop])

start=stopwhile 1:try:for row indst_list:

dst.append(row.pop(0))exceptIndexError:breakurl= ''.join(dst)return urllib2.unquote(url).replace('^','0')importtimedef collect(mid=376239,high_quality=False):

req=music_base%str(mid)

req=urllib2.Request(req)

req.add_header("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")

req.add_header("Accept-Language","zh-CN,zh;q=0.8")

req.add_header("Cache-Control","no-cache")

req.add_header("Connection","close")

req.add_header("Pragma","no-cache")

req.add_header("Referer","http://www.baidu.com/")

req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36")

req.add_header("cookie","_unsign_token=d0e87c7230b44e116e3b8e96c48c9b62; __gads=ID=3e9c72b9e0b3e7ba:T=1407824092:S=ALNI_MYxedT1iMAiA-IXbcgEu4Ss_XiRaw; box_opened=1; bdshare_firstime=1409207591670; __utma=251084815.350459004.1409209135.1409209135.1409209135.1; __utmz=251084815.1409209135.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); member_auth=2WydGIYavmhn16fDTt9ldyYb5%2BHTT2eFyY9Yjb4ovwQnIooIY9H%2Bx6uVQg5L3yCaq2HKtwNJXYSZg3aFgGLx8Kg; user=40534293%22%E5%93%8E%E5%B0%8F%E7%AC%A8%E8%9B%8Ba%22%220%221%22%3Ca+href%3D%27%2Fwebsitehelp%23help9_3%27+%3Edo%3C%2Fa%3E%220%220%220%22ee71485989%221409217771; ahtena_is_show=false; recent_tags=%E6%BF%80%E6%83%85+%E7%97%9B%E8%8B%A6+%E4%BC%A4%E5%BF%83+%E5%BF%A7%E4%BC%A4+; user_from=1; t_sign_auth=0; __guestplay=MTc3MjEzMDMyMywxOzE3NzE0MTkwNTQsMjsxNzY5OTI0MjQ0LDE%3D; pnm_cku822=187n%2BqZ9mgNqgJnCG0WtMC8x7vAtsC0zrXQcNA%3D%7CnOiH84T3i%2FOL%2F4zwi%2FyG9VU%3D%7CneiHGXz6UeRW5k4rRCFXLkskQdt3xmHTad%2B6Gro%3D%7Cmu6b9JHlkuGd5Z3pmuad6pDjnu2c65%2Fkneef5JjhluyX7JjhmuCFJQ%3D%3D%7Cm%2B%2BT%2FGIXeAx4D2AUbwBl1mcbhfZW1n3Fv8F03GvTZte00XHR%7CmO6BH2wDdg11Gm4bbht0B2gcYBVmCX0OdQZpHWEUZwh8D3gDowM%3D%7Cme6d7oHyneiH84Twn%2BmR64TzUw%3D%3D; CNZZDATA921624=cnzz_eid%3D6125411959-1407824089-%26ntime%3D1409550640; CNZZDATA2629111=cnzz_eid%3D1781743730-1407814089-%26ntime%3D1409450640; _xiamitoken=7cec7fe673a3672812c4b714a31d6687; isg=67257CF91c74F3297A603C00A816D262; sec=5401410089735bee8e0075e0b6825e0ba6a0a485")

c=0while 1:try:

page=urllib2.urlopen(req)

time.sleep(0.5)break

excepturllib2.HTTPError:

c+=1

if c==5:print 'id is:',midreturn{}

dom=BeautifulSoup(page.read(),features="xml")try:

title=dom.find("title").text

song_id=dom.find("song_id").text

url=dom.find("location").text

url=decrypt_url(url)

lyric=dom.find("lyric").text

background=dom.find("background").text

album_id=dom.find("album_id").text

album_pic_s=dom.find("pic").text

album_pic=dom.find("album_pic").text

album_name=dom.find("album_name").text

artist_id=dom.find("artist_id").text

artist=dom.find("artist").textexceptAttributeError:print 'id is:',idprinttitleifhigh_quality:

url=url.split("?auth_key")[0][::-1].replace("l_","h_")[::-1]return {"title":title,"song_id":song_id,"url":url,"lyric":lyric,"background":background,"album_id":album_id,"album_pic_s":album_pic_s,"album_pic":album_pic,"album_name":album_name,"artist_id":artist_id,"artist":artist,"xiami":True,

}#from pprint import pprint#pprint(collect())

defsplit_var(s):

s_list=s.split("\n")

dst=""

for i ins_list:

line=i.strip()if line=="":continuevar=line.split("=")[0]

dst+='"'+var+'":'+var+','dst="{"+dst+"}"

print ''

printdstprint ''dst=""

for i ins_list:

line=i.strip()if line=="":continuevar=line.split("=")[0]

dst+=var+"=i[\""+var+"\"]"+"\n"

printdstprint ""

prints='''title=dom.find("title").text

song_id=dom.find("song_id").text

url=decrypt_url(url)

lyric=dom.find("lyric").text

background=dom.find("background").text

album_id=dom.find("album_id").text

album_pic_s=dom.find("pic").text

album_pic=dom.find("album_pic").text

album_name=dom.find("album_name").text

artist_id=dom.find("artist_id").text

artist=dom.find("artist").text'''

##已失效。必须实时获取

split_var(s)from pprint importpprint

dst=[]for i inids:

data=collect(i)ifdata:

dst.append(data)importjson,urllib

data=json.dumps(dst)

dst={"data":data}#print urllib.urlopen("http://localhost/music/upload",data=urllib.urlencode(dst)).read()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值