# coding: utf-8
import httplib
import urllib
import sys
def dlProgress(count, blockSize, totalSize):
percent = int(count * blockSize * 100 / totalSize)
sys.stdout.write('finished ...... %3d%%\r' % percent)
sys.stdout.flush()
def getNextValue(s_item, i_seek):
s_item = s_item.replace('<em>' , '')
s_item = s_item.replace('</em>', '')
i_sl = s_item.find('<', i_seek)
i_sr = s_item.find('>', i_seek)
if i_sl == -1 and i_sr == -1:
return ('', -1)
itemlen = len(s_item)
i_seek = 0
if i_sl < i_sr:
while i_sl < itemlen:
i_sl = s_item.find('>', i_sl)
if i_sl == -1:
return ('', -1)
i_sl += 1
if s_item[i_sl] != '<':
while ord(s_item[i_sl]) < 0x21: # 0x20以下为控制字符,0x20为空格,这里不考虑
i_sl += 1
if i_sl >= itemlen:
break
else:
if s_item[i_sl] != '<':
break
else:
return ('', -1)
i_seek = i_sl
else:
i_sr += 1
while i_sr < itemlen:
if s_item[i_sr] != '<':
while ord(s_item[i_sr]) < 0x21:
i_sr += 1
if i_sr >= itemlen:
break
else:
if s_item[i_sr] != '<':
break
else:
i_sr = s_item.find('>', i_sr)
if i_sr == -1:
return ('', -1)
i_sr += 1
else:
return ('', -1)
i_seek = i_sr
i_sr = s_item.find('<', i_seek)
if i_sr == -1:
return (s_item[i_seek : ].strip(), i_sr)
else:
return (s_item[i_seek : i_sr].strip(),i_sr)
def main():
# 输入字符串
while(True):
s_input = raw_input(u'\r输入歌名:'.encode('gbk')).decode('gbk').encode('utf-8')
if len(s_input) == 0:
print 'no input! input again!'
elif s_input == '**':
return 2 # 退出
else:
break
s_path = 'D:\\' + s_input + '.mp3' # 文件保存目录路径
s_output = ''
# 解析歌名字符串
for ch in s_input:
if ord(ch) > 127:
s_output += (hex(ord(ch)).replace('0x', '%').upper())
else:
s_output += ch
s_output = '/search?key=' + s_output
# 获得歌曲搜索结果列表HTML代码
# GET /search?key=...
# host: music.baidu.com
# port: 80
cls_http = httplib.HTTPConnection('music.baidu.com:80')
cls_http.request('GET', s_output)
cls_resp = cls_http.getresponse()
if cls_resp.status != 200:
print 'No response!'
cls_http.close()
return 1
s_web = cls_resp.read()
cls_http.close()
# 解析
lt_info = []
i_start = s_web.find('"index-num index-hook"')
while i_start != -1:
i_end = s_web.find('"index-num index-hook"', i_start + 1)
s_item = ''
if i_end != -1:
s_item = s_web[i_start : i_end]
else:
s_item = s_web[i_start:]
i_sk = s_item.find('"song-info "')
if i_sk == -1: # 非百度的资源,无法下载,这里选择忽略
i_start = i_end
continue
i_title = s_item.find('class="song-title"')
i_author = s_item.find('class="author_list"')
i_album = s_item.find('class="album-title"')
i_fun_icn= s_item.find('class="fun-icon"')
i_sk = s_item.find('/song/', i_sk) + 6
s_id = s_item[i_sk : s_item.find('"', i_sk)]
s_nm, i_sk = getNextValue(s_item[i_title : i_author], 0)
s_au, i_sk = getNextValue(s_item[i_author: i_album] , 0)
s_al, i_sk = getNextValue(s_item[i_album : i_fun_icn], 0)
dt_item = {}
dt_item['ID'] = s_id
dt_item['name'] = s_nm
dt_item['author'] = s_au
dt_item['album'] = s_al
lt_info.append(dt_item)
i_start = i_end
# 显示搜索结果列表
if len(lt_info) == 0:
print 'no search result!'
return 1
i_index = 1
for itm in lt_info:
print '-' * 38, '%2d' % i_index, '-' * 38,
print '\rID:%s\tName:%s\tAuthor:%s\tAlbum:%s' % (itm['ID'],
itm['name'].decode('utf-8').encode('gbk'),
itm['author'].decode('utf-8').encode('gbk'),
itm['album'].decode('utf-8').encode('gbk'))
i_index += 1
print '*' * 80,
# 选择歌曲(需要输入数字)
while(True):
s_input = raw_input(u'\r选择:'.encode('gbk')).decode('gbk').encode('utf-8')
try:
i_index = int(s_input) - 1
except ValueError:
print 'input error! input again!'
if i_index >= len(lt_info) or i_index < 0:
print 'out of range! input again!'
else:
break
s_id = lt_info[i_index]['ID']
# 获得下载页面的HMTL代码
# GET /song/.../download
# host: music.baidu.com
# port: 80
cls_http = httplib.HTTPConnection('music.baidu.com:80')
cls_http.request('GET', '/song/' + s_id + '/download')
cls_resp = cls_http.getresponse()
if cls_resp.status != 200:
print 'No response!'
cls_http.close()
return 1
s_web = cls_resp.read()
cls_http.close()
# 解析下载页面HTML代码
lt_down = []
i_start = s_web.find('data-data')
while i_start != -1:
i_end = s_web.find('</li>', i_start)
s_item = s_web[i_start : i_end]
i_sk = s_item.find('bit') + len('bit')
s_br = s_item[i_sk : i_sk + 3]
i_sk = s_item.find('/data2/')
if i_sk == -1:
s_url = ''
else:
s_url = 'http://zhangmenshiting.baidu.com' + s_item[i_sk : s_item.find('"', i_sk)]
dt_item = {}
dt_item['bitrate'] = s_br
dt_item['url'] = s_url
dt_item['info'] = []
s_inf, i_sk = getNextValue(s_item, 0)
while s_inf != '' and i_sk != 0:
dt_item['info'].append(s_inf)
s_inf, i_sk = getNextValue(s_item, i_sk)
lt_down.append(dt_item)
i_start = s_web.find('data-data', i_end + len('</li>'))
# 显示
if len(lt_down) == 0:
print 'no download!'
return 1
i_index = 1
for itm in lt_down:
print '-' * 38, '%2d' % i_index, '-' * 38,
print '\rBitrate:%s\tInformation:%s\nURL:%s' % (itm['bitrate'],
' '.join(itm['info']).decode('utf-8').encode('gbk'),
itm['url'])
i_index += 1
print '*' * 80,
while(True):
s_input = raw_input(u'\r选择:'.encode('gbk')).decode('gbk').encode('utf-8')
try:
i_index = int(s_input) - 1
except ValueError:
print 'input error! input again!'
if i_index >= len(lt_down) or i_index < 0:
print 'out of range!'
else:
break
# 下载文件
urllib.urlretrieve(lt_down[i_index]['url'], s_path.decode('utf-8'), reporthook = dlProgress)
print 'Success!'
return 0
if __name__ == '__main__':
while(True):
ret = main()
if ret == 2:
break
print '#' * 80,