python爬取酷狗音乐_Python实例---爬去酷狗音乐

项目一:获取酷狗TOP 100

排名

519608-20180909165202061-2130518844.png

文件&&歌手

519608-20180909165204264-1174783563.png

时长

519608-20180909165206258-1382435735.png

效果:

519608-20180909165207964-1296380096.png

附源码: import time

import json

from bs4 import BeautifulSoup

import requests

class Kugou(object):

def __init__(self):

self.header = {

"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0'

}

def getInfo(self, url):

html = requests.get(url, headers=self.header)

soup = BeautifulSoup(html.text, 'html.parser')

# print(soup.prettify())

ranks = soup.select('.pc_temp_num')

titles = soup.select('.pc_temp_songlist > ul > li > a') # 层层标签查找

times = soup.select('.pc_temp_time')

for rank, title, songTime in zip(ranks, titles, times):

data = {

# rank 全打印就是带HTML标签的

'rank': rank.get_text().strip(),

'title': title.get_text().split('-')[1].strip(),

'singer': title.get_text().split('-')[0].strip(),

'songTime': songTime.get_text().strip()

}

s = str(data)

print('rank:%2s\t' % data['rank'], 'title:%2s\t' % data['title'], 'singer:%2s\t' %data['singer'], 'songTime:%2s\t' % data['songTime'])

with open('hhh.txt', 'a', encoding='utf8') as f:

f.writelines(s + '\n')

if __name__ == '__main__':

urls = [

'http://www.kugou.com/yy/rank/home/{}-8888.html'.format(str(i)) for i in range(30)

]

kugou = Kugou()

for url in urls:

kugou.getInfo(url)

time.sleep(1)

部分代码解析--------------------------------------------------------------------

urls = ['http://www.kugou.com/yy/rank/home/{}-8888.html'.format(str(i)) for i in range(1, 5)]

for i in urls:

print(i)

结果打印:

http://www.kugou.com/yy/rank/home/1-8888.html

http://www.kugou.com/yy/rank/home/2-8888.html

http://www.kugou.com/yy/rank/home/3-8888.html

http://www.kugou.com/yy/rank/home/4-8888.html

--------------------------------------------------------------------

for rank, title, songTime in zip(ranks, titles, times):

data = {

# rank 全打印就是带HTML标签的

'rank': rank.get_text().strip(),

'title': title.get_text().split('-')[0].strip(),

'singer': title.get_text().split('-')[1].strip(),

'songTime': songTime.get_text()

}

print(data['rank'])

print(data['title'])

print(data['singer'])

print(data['songTime'])

结果打印:

1

飞驰于你

许嵩

4: 04

--------------------------------------------------------------------

for rank, title, songTime in zip(ranks, titles, times):

data = {

# rank 全打印就是带HTML标签的

'rank': rank,

'title': title,

'songTime': songTime

}

print(data['rank'])

print(data['title'])

print(data['songTime'])

结果打印:

1

许嵩 - 飞驰于你

4:04

项目二:搜索曲目获取URL

根据关键字搜索后的结果:

519608-20180909173212661-518120689.png# encoding=utf-8

# Time : 2018/4/27

# Email : z2615@163.com

# Software: PyCharm

# Language: Python 3

import requests

import json

class KgDownLoader(object):

def __init__(self):

self.search_url = 'http://songsearch.kugou.com/song_search_v2?callback=jQuery191034642999175022426_1489023388639&keyword={}&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection=1&privilege_filter=0&_=1489023388641'

# .format('园游会')

self.play_url = 'http://www.kugou.com/yy/index.php?r=play/getdata&hash={}'

self.song_info = {

'歌名': None,

'演唱者': None,

'专辑': None,

'filehash': None,

'mp3url': None

}

def get_search_data(self, keys):

search_file = requests.get(self.search_url.format(keys))

search_html = search_file.content.decode().replace(')', '').replace(

'jQuery191034642999175022426_1489023388639(', '')

views = json.loads(search_html)

for view in views['data']['lists']:

song_name = view['SongName'].replace('', '').replace('', '')

album_name = view['AlbumName'].replace('', '').replace('', '')

sing_name = view['SingerName'].replace('', '').replace('', '')

file_hash = view['FileHash']

new_info = {

'歌名': song_name,

'演唱者': sing_name,

'专辑': album_name if album_name else None,

'filehash': file_hash,

'mp3url': None

}

self.song_info.update(new_info)

yield self.song_info

def get_mp3_url(self, filehash):

mp3_file = requests.get(self.play_url.format(filehash)).content.decode()

mp3_json = json.loads(mp3_file)

real_url = mp3_json['data']['play_url']

self.song_info['mp3url'] = real_url

yield self.song_info

def save_mp3(self, song_name, real_url):

with open(song_name + ".mp3", "wb")as fp:

fp.write(requests.get(real_url).content)

if __name__ == '__main__':

kg = KgDownLoader()

mp3_info = kg.get_search_data(input('请输入歌名:'))

for x in mp3_info:

mp3info = kg.get_mp3_url(x['filehash'])

for i in mp3info:

print(i)

519608-20180909172608131-1573324889.png

项目三:搜索下载歌曲

代码仅供学习参考from selenium import webdriver

from bs4 import BeautifulSoup

import urllib.request

from selenium.webdriver.common.action_chains import ActionChains

input_string = input('>>>please input the search key:')

#input_string="你就不要想起我"

driver = webdriver.Chrome()

driver.get('http://www.kugou.com/')

a=driver.find_element_by_xpath('/html/body/div[1]/div[1]/div[1]/div[1]/input') #输入搜索内容/html/body/div[1]/div[1]/div[1]/div[1]/input

a.send_keys(input_string)

driver.find_element_by_xpath('/html/body/div[1]/div[1]/div[1]/div[1]/div/i').click() #点击搜索/html/body/div[1]/div[1]/div[1]/div[1]/div/i

for handle in driver.window_handles:#方法二,始终获得当前最后的窗口,所以多要多次使用

driver.switch_to_window(handle)

#result_url = driver.current_url

#driver = webdriver.Firefox()

#driver.maximize_window()

#driver.get(result_url)

#j=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[2]/div[1]/a').get_attribute('title')测试

#print(j)

soup = BeautifulSoup(driver.page_source,'lxml')

PageAll = len(soup.select('ul.list_content.clearfix > li'))

print(PageAll)

for i in range(1,PageAll+1):

j=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[%d]/div[1]/a'%i).get_attribute('title')

print('%d.'%i + j)

choice=input("请输入你要下载的歌曲(输入序号):")

#global mname

#mname=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[%d]/div[1]/a'%choice).get_attribute('title')#歌曲名

a=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[%s]/div[1]/a'%choice)#定位

b=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[%s]/div[1]/a'%choice).get_attribute('title')

actions=ActionChains(driver)#selenium中定义的一个类

actions.move_to_element(a)#将鼠标移动到指定位置

actions.click(a)#点击

actions.perform()

#wait(driver)?

#driver = webdriver.Firefox()

#driver.maximize_window()

#driver.get(result_url)

#windows = driver.window_handles

#driver.switch_to.window(windows[-1])

#handles = driver.window_handles

for handle in driver.window_handles:#方法二,始终获得当前最后的窗口,所以多要多次使用

driver.switch_to_window(handle)

Local=driver.find_element_by_xpath('//*[@id="myAudio"]').get_attribute('src')

print(driver.find_element_by_xpath('//*[@id="myAudio"]').get_attribute('src'))

def cbk(a, b, c):

per = 100.0 * a * b / c

if per > 100:

per = 100

print('%.2f%%' % per)

soup=BeautifulSoup(b)

name=soup.get_text()

path='D:\%s.mp3'%name

urllib.request.urlretrieve(Local, path, cbk)

print('finish downloading %s.mp3' % name + '\n\n')

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值