python爬取酷狗音乐_Python实例---爬去酷狗音乐

最新推荐文章于 2024-05-26 20:02:29 发布

weixin_39999116

最新推荐文章于 2024-05-26 20:02:29 发布

阅读量1k

点赞数

文章标签： python爬取酷狗音乐

项目一：获取酷狗TOP 100

排名

文件&&歌手

时长

效果：

附源码： import time

import json

from bs4 import BeautifulSoup

import requests

class Kugou(object):

def __init__(self):

self.header = {

"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0'

}

def getInfo(self, url):

html = requests.get(url, headers=self.header)

soup = BeautifulSoup(html.text, 'html.parser')

# print(soup.prettify())

ranks = soup.select('.pc_temp_num')

titles = soup.select('.pc_temp_songlist > ul > li > a') # 层层标签查找

times = soup.select('.pc_temp_time')

for rank, title, songTime in zip(ranks, titles, times):

data = {

# rank 全打印就是带HTML标签的

'rank': rank.get_text().strip(),

'title': title.get_text().split('-')[1].strip(),

'singer': title.get_text().split('-')[0].strip(),

'songTime': songTime.get_text().strip()

}

s = str(data)

print('rank:%2s\t' % data['rank'], 'title:%2s\t' % data['title'], 'singer:%2s\t' %data['singer'], 'songTime:%2s\t' % data['songTime'])

with open('hhh.txt', 'a', encoding='utf8') as f:

f.writelines(s + '\n')

if __name__ == '__main__':

urls = [

'http://www.kugou.com/yy/rank/home/{}-8888.html'.format(str(i)) for i in range(30)

]

kugou = Kugou()

for url in urls:

kugou.getInfo(url)

time.sleep(1)

部分代码解析--------------------------------------------------------------------

urls = ['http://www.kugou.com/yy/rank/home/{}-8888.html'.format(str(i)) for i in range(1, 5)]

for i in urls:

print(i)

结果打印：

http://www.kugou.com/yy/rank/home/1-8888.html

http://www.kugou.com/yy/rank/home/2-8888.html

http://www.kugou.com/yy/rank/home/3-8888.html

http://www.kugou.com/yy/rank/home/4-8888.html

--------------------------------------------------------------------

for rank, title, songTime in zip(ranks, titles, times):

data = {

# rank 全打印就是带HTML标签的

'rank': rank.get_text().strip(),

'title': title.get_text().split('-')[0].strip(),

'singer': title.get_text().split('-')[1].strip(),

'songTime': songTime.get_text()

}

print(data['rank'])

print(data['title'])

print(data['singer'])

print(data['songTime'])

结果打印：

飞驰于你

许嵩

4: 04

--------------------------------------------------------------------

for rank, title, songTime in zip(ranks, titles, times):

data = {

# rank 全打印就是带HTML标签的

'rank': rank,

'title': title,

'songTime': songTime

}

print(data['rank'])

print(data['title'])

print(data['songTime'])

结果打印：

许嵩 - 飞驰于你

4:04

项目二：搜索曲目获取URL

根据关键字搜索后的结果：

# encoding=utf-8

# Time : 2018/4/27

# Email : z2615@163.com

# Software: PyCharm

# Language: Python 3

import requests

import json

class KgDownLoader(object):

def __init__(self):

self.search_url = 'http://songsearch.kugou.com/song_search_v2?callback=jQuery191034642999175022426_1489023388639&keyword={}&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection=1&privilege_filter=0&_=1489023388641'

# .format('园游会')

self.play_url = 'http://www.kugou.com/yy/index.php?r=play/getdata&hash={}'

self.song_info = {

'歌名': None,

'演唱者': None,

'专辑': None,

'filehash': None,

'mp3url': None

}

def get_search_data(self, keys):

search_file = requests.get(self.search_url.format(keys))

search_html = search_file.content.decode().replace(')', '').replace(

'jQuery191034642999175022426_1489023388639(', '')

views = json.loads(search_html)

for view in views['data']['lists']:

song_name = view['SongName'].replace('', '').replace('', '')

album_name = view['AlbumName'].replace('', '').replace('', '')

sing_name = view['SingerName'].replace('', '').replace('', '')

file_hash = view['FileHash']

new_info = {

'歌名': song_name,

'演唱者': sing_name,

'专辑': album_name if album_name else None,

'filehash': file_hash,

'mp3url': None

}

self.song_info.update(new_info)

yield self.song_info

def get_mp3_url(self, filehash):

mp3_file = requests.get(self.play_url.format(filehash)).content.decode()

mp3_json = json.loads(mp3_file)

real_url = mp3_json['data']['play_url']

self.song_info['mp3url'] = real_url

yield self.song_info

def save_mp3(self, song_name, real_url):

with open(song_name + ".mp3", "wb")as fp:

fp.write(requests.get(real_url).content)

if __name__ == '__main__':

kg = KgDownLoader()

mp3_info = kg.get_search_data(input('请输入歌名：'))

for x in mp3_info:

mp3info = kg.get_mp3_url(x['filehash'])

for i in mp3info:

print(i)

项目三：搜索下载歌曲

代码仅供学习参考from selenium import webdriver

from bs4 import BeautifulSoup

import urllib.request

from selenium.webdriver.common.action_chains import ActionChains

input_string = input('>>>please input the search key:')

#input_string="你就不要想起我"

driver = webdriver.Chrome()

driver.get('http://www.kugou.com/')

a=driver.find_element_by_xpath('/html/body/div[1]/div[1]/div[1]/div[1]/input') #输入搜索内容/html/body/div[1]/div[1]/div[1]/div[1]/input

a.send_keys(input_string)

driver.find_element_by_xpath('/html/body/div[1]/div[1]/div[1]/div[1]/div/i').click() #点击搜索/html/body/div[1]/div[1]/div[1]/div[1]/div/i

for handle in driver.window_handles:#方法二，始终获得当前最后的窗口，所以多要多次使用

driver.switch_to_window(handle)

#result_url = driver.current_url

#driver = webdriver.Firefox()

#driver.maximize_window()

#driver.get(result_url)

#j=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[2]/div[1]/a').get_attribute('title')测试

#print(j)

soup = BeautifulSoup(driver.page_source,'lxml')

PageAll = len(soup.select('ul.list_content.clearfix > li'))

print(PageAll)

for i in range(1,PageAll+1):

j=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[%d]/div[1]/a'%i).get_attribute('title')

print('%d.'%i + j)

choice=input("请输入你要下载的歌曲（输入序号）：")

#global mname

#mname=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[%d]/div[1]/a'%choice).get_attribute('title')#歌曲名

a=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[%s]/div[1]/a'%choice)#定位

b=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[%s]/div[1]/a'%choice).get_attribute('title')

actions=ActionChains(driver)#selenium中定义的一个类

actions.move_to_element(a)#将鼠标移动到指定位置

actions.click(a)#点击

actions.perform()

#wait(driver)?

#driver = webdriver.Firefox()

#driver.maximize_window()

#driver.get(result_url)

#windows = driver.window_handles

#driver.switch_to.window(windows[-1])

#handles = driver.window_handles

for handle in driver.window_handles:#方法二，始终获得当前最后的窗口，所以多要多次使用

driver.switch_to_window(handle)

Local=driver.find_element_by_xpath('//*[@id="myAudio"]').get_attribute('src')

print(driver.find_element_by_xpath('//*[@id="myAudio"]').get_attribute('src'))

def cbk(a, b, c):

per = 100.0 * a * b / c

if per > 100:

per = 100

print('%.2f%%' % per)

soup=BeautifulSoup(b)

name=soup.get_text()

path='D:\%s.mp3'%name

urllib.request.urlretrieve(Local, path, cbk)

print('finish downloading %s.mp3' % name + '\n\n')

weixin_39999116

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
python爬取酷狗音乐_Python实例---爬去酷狗音乐

项目一：获取酷狗TOP 100排名文件&&歌手时长效果：附源码： import timeimport jsonfrom bs4 import BeautifulSoupimport requestsclass Kugou(object):def __init__(self):self.header = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0...
复制链接

扫一扫