python 爬虫 音乐下载 歌手_python爬虫批量下载全民K歌音乐

标签:爬虫

网址示例: https://node.kg.qq.com/personal?uid=639e9983222a338a

直接上源码:

import requests

import time

import re

import json

import pprint

import math

import os

header={

'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0'

}

SongList=[]

song_baseurl="http://node.kg.qq.com/play"

album_baseurl="http://node.kg.qq.com/cgi/fcgi-bin/kg_ugc_get_homepage"

def Down(url_file, filePath, FileDir):

if not os.path.isdir(FileDir): os.makedirs(FileDir)

if os.path.isfile(FileDir + "/" + filePath):

print(filePath + " --已存在")

return 0

try:

r = requests.get(url_file, stream=True)

with open(FileDir + "/" + filePath, "wb") as f:

size = int(r.headers['content-length'])

title = " 当前下载-" + filePath + " 文件大小:" + size + "字节"

print('\033[0;31m' + title + "\033[0m")

CurTotal = 0

for chunk in r.iter_content(chunk_size=512 * 1024):

if chunk:

f.write(chunk)

CurTotal += len(chunk)

print("\r" + filePath + "--下载进度:" + '%3s' % (str(CurTotal * 100 // size)) + "%", end='')

print()

r.close()

except Exception as e:

print(filePath + " 下载出错!" + " 错误信息" + str(e.args))

if os.path.isfile(FileDir + "/" + filePath): os.remove(FileDir + "/" + filePath)

def GetData(data,url):

response = requests.get(url,params=data, headers=header)

return response.content.decode("utf-8")

def Parse_Song_Info(content):

jsonobj = re.findall(r'window.__DATA__ = (.*?); ', content)

if len(jsonobj) > 0:

data = json.loads(jsonobj[0])

# pprint.pprint(data)

obj={}

obj[ "name"]=data['detail']['song_name']

if data['detail']['playurl']:

print(data['detail']['song_name']+" 音乐:"+data['detail']['playurl'])

obj["url"]=data['detail']['playurl']

obj["type"]=".mp3"

else:

print(data['detail']['song_name'] + " 视频:" + data['detail']['playurl_video'])

obj["url"] = data['detail']['playurl_video']

obj["type"] = ".mp4"

SongList.append(obj)

else:

print("没有爬取到")

def GetSongsByIndex(uid, Is_Parse, page):

data={

'jsonpCallback':'callback_0',

'g_tk':'5381',

'outCharset':'utf-8',

'format':'jsonp',

'type':'get_ugc',

'start':str(page),

'num':'8',

'touin':'',

'share_uid':uid,

'g_tk_openkey':'5381',

'_':str(int(time.time()*1000))

}

response=requests.get(album_baseurl,params=data,headers=header)

jsonobj=re.findall(r'callback_\d\((.*)\)',response.content.decode("utf-8"))

if len(jsonobj)>0:

data=json.loads(jsonobj[0])

# pprint.pprint(data)

count=data['data']['ugc_total_count']

if Is_Parse:

time.sleep(1)

for obj in data['data']['ugclist']:

print(obj['title']+" -- "+obj['shareid'])

songdata = {

"s": obj['shareid']

}

content=GetData(songdata,song_baseurl)

Parse_Song_Info(content)

else:

pprint.pprint("共计:" + str(count))

return count

else:

print("没有爬取到")

return 0

def Run(uid):

count=GetSongsByIndex(uid, False, 1)

if count!=0:

for page in range(1,math.ceil(count/8)+1):

GetSongsByIndex(uid, True, page)

else:

print("该用户没有歌曲")

if __name__=="__main__":

Run('639e9983222a338a')

for s in SongList:

Down(s["url"],s["name"]+s["type"],"小小")

# print(s)

将Run 括号里面的字符串换为 歌手主页链接后面的uid

“小小” 可自定义文件夹

标签:爬虫

来源: https://www.cnblogs.com/yuanzessrs/p/10247347.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值