pytho_抓取下载音乐歌曲

抓取酷我音乐的流程

  1. 打开Chrome浏览器找到酷我音乐的首页

  2. 先按F12调出chrome浏览器的调试工具

  3. 搜索歌曲,并且查看NextWord选项的抓包情况

  4. 通过寻找发现发送响应的url地址
    在这里插入图片描述

  5. 打开文件并观察里面的数据 发现是一个带有json的数据网页,尝试打开这个json页面却发现请求错误404
    在这里插入图片描述

  6. 发现不让我们访问有带json界面的数据, 经过问候度娘在不知名的网页中得知:有时候用户不能访问但是程序可以访问, 然后试了一下使用python成功访问:在这里插入图片描述

  7. 获取请求后继续观察url地址,查找不同的歌曲,并且查看url地址中的参数是否发生变化

  8. 观察后得知 里面有2处地址发生变化,发生变化的位置已经用{}代替方便后续的参数传递
    在这里插入图片描述

  9. 观察从刚刚获取请求的json数据后发现需要传递的参数都在json数据里面。先把请求好的str类型的html字符串使用json.loads()方法转换为python能够识别的内容 接下来就是按照字典提取数据的方法提取所需要的数据,
    10.程序代码如下:

import requests
import json
import os

class My_Music():
    def __init__(self):
        print("*"*25+"音乐抓取器"+"*"*25)
        key_name = input('*请输入你要查找的歌曲名称*:')
        num = input('*请输入你要查看歌曲列表第几页*:')
        self.headers = {

            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36"

        }

        self.headers2 = {
            "Accept": "application/json, text/plain, */*",
            "Accept-Encoding": "gzip, deflate",
            "Accept-Language": "zh-CN,zh;q=0.9",
            "Connection": "keep-alive",
            "Cookie": "_ga=GA1.2.1217877481.1602902461; h5Uuid=d48c2935f62740f1a5fb2a98c8a5c8-77; _gid=GA1.2.1687724501.1603190279; Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1602902462,1602923878,1603190279; uname3=%5EO%5E; t3kwid=225768453; userid=225768453; websid=1015091557; pic3=\"http://q.qlogo.cn/qqapp/100243533/90AF896B7270476F63C06DE71F6BFCA4/100\"; t3=qq; _gat=1; Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1603190370; kw_token=PLSZIEEDJSK",
            "csrf": "PLSZIEEDJSK",
            "Host": "www.kuwo.cn",
            "Referer": "http://www.kuwo.cn/search/list?key=%E6%B8%B8%E5%B1%B1%E6%81%8B",
            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36",

        }
        self.headers1 = {
                        "Accept": "*/*",
                        "Accept-Encoding": "identity;q=1, *;q=0",
                        "Accept-Language": "zh-CN,zh;q=0.9",
                        "Connection": "keep-alive",
                        "Cookie": "_ga=GA1.2.1217877481.1602902461; h5Uuid=d48c2935f62740f1a5fb2a98c8a5c8-77; _gid=GA1.2.1687724501.1603190279; uname3=%5EO%5E; t3kwid=225768453; userid=225768453; websid=1015091557; pic3=\"http://q.qlogo.cn/qqapp/100243533/90AF896B7270476F63C06DE71F6BFCA4/100\"; t3=qq; Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1602902462,1602923878,1603190279,1603192339; Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1603192422",
                        "Host": "ey-sycdn.kuwo.cn",
                        "If-None-Match": "\"5f3faedb-350ce7\"",
                        "Range": "bytes=3145728-3476710",
                        "Referer": "https://ey-sycdn.kuwo.cn/15a6818d99191a29bc2ced51da173052/5f8ec8bc/resource/n3/65/71/2640215188.mp3",
                        "Sec-Fetch-Dest": "video",
                        "Sec-Fetch-Mode": "no-cors",
                        "Sec-Fetch-Site": "\"same-origin\"",
                        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36",
        }


        #
        self.url1 = "http://www.kuwo.cn/url?format=mp3&rid={}&response=url&type=convert_url3&br=128kmp3&from=web&t=1603196093062&httpsStatus=1&reqId=dbd3f971-12cd-11eb-984f-451e90d80fbc"

        # api
        self.url2 = 'http://www.kuwo.cn/api/www/search/searchMusicBykeyWord?key={}&pn={}&rn=30&httpsStatus=1&reqId=da11ad51-d211-11ea-b197-8bff3b9f83d2e'.format(
            key_name, num)

        # 保存歌曲的数量
        self.music_list = []
        # 放置所有歌手人名
        self.all_singers = []
        # 放置歌曲名字
        self.names = []
        # 放置所有rid,rid是网页所需参数
        self.all_rid = []


    def get_response(self, url, headers, code=True):
        response = requests.get(url=url, headers=headers)
        if code == True:
            return json.loads(response.content.decode())
        elif code == False:
            response = requests.get(url=url, headers=headers)
            return response.content

    def json_html(self, jos_html, code=True):
        if code == True:
                with open("josn文件.json", "w") as f:
                    f.write(json.dumps(jos_html, ensure_ascii=False, indent=4))
        else:
            with open("歌曲.mp3", "wb") as f:
                f.write(jos_html)

    def music_lists(self, music_list):
        a = 0
        for music in music_list:
                #保存歌曲的数量
                self.music_list.append(music)
                #放置所有歌手人名
                self.all_singers.append(music["artist"])
                a += 1
                # 放置歌曲名字
                self.names.append(str(a)+"  "+music["name"])
                # 放置所有rid,rid是网页所需参数
                self.all_rid.append(music["musicrid"].split("_")[-1])

        return self.music_list, self.all_singers, self.names, self.all_rid


    def get_song_url(self, name, singers, all_rid):
        infs = dict(zip(name, singers))
        infs = json.dumps(infs, ensure_ascii=False, indent=0, separators=(',', ':'))
        infs = infs.replace('"', ' ')
        infs = infs.replace(':', '——————')
        print(infs.split("{")[-1].split("}")[0].replace(",", ""))
        order = int(input("*请输入歌曲前的序号*:"))
        order -= 1
        musicrid = all_rid[order]
        url = self.url1.format(musicrid)
        music_url = self.get_response(url, self.headers2)
        return music_url, order

    def music_song(self, song ,file_names, singers):
        self.file()
        name = str(file_names)+"---"+str(singers)
        file_name = "歌曲库//"+name.split("  ")[-1]+".mp3"
        with open(file_name, "wb") as f:
            f.write(song)
            print(file_name.split("//")[-1],"下载保存成功!")

    def file(self):
        """判断当前文件是否存在"""
        path = "歌曲库"
        if not os.path.exists(path):
            os.mkdir(path)
            print(path.split("/")[-1], ":创建成功")

    def run(self):
        music_json = self.get_response(self.url2, self.headers2)
        # 保存json文件 方便查看
        self.json_html(music_json)
        music_list = music_json["data"]["list"]
        print("当前页面有", len(music_list), "支歌曲:")
        content, singers, name, all_rid= self.music_lists(music_list)
        music_url, orid = self.get_song_url(name, singers, all_rid)
        song = music_url["url"]
        singer = self.get_response(song, self.headers, code=False)
        self.music_song(singer, name[int(orid)], singers[int(orid)])


if __name__ == '__main__':
    while True:
        try:
            music = My_Music()
            music.run()

        except ValueError:
            print("歌曲下载错误,歌曲序号请输入整数!")

        except requests.exceptions.ConnectionError:
            print("网络连接错误,请检查网络连接!")
            print("程序退出")
            break

        except Exception as f:
            print("网络连接超时,请重启检测网络通畅性!")

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值