【爬取音乐,并将音乐信息储存到数据库中】

确定音乐网站的url并分析网站

请添加图片描述

分析二级页面

请添加图片描述
请添加图片描述

创建数据库

# 创建一个链接对象
conn = pymysql.connect(host='master', user='root', password='123456', port=3306, db='spider')
# 创建游标
cur = conn.cursor()
sql_createTb = """CREATE TABLE music (
                 id INT NOT NULL AUTO_INCREMENT,
                 title VARCHAR(255),
                 signer  VARCHAR(255),
                 zuoci VARCHAR(255),
                 zuoqu VARCHAR(255),
                 album VARCHAR(255),
                 PRIMARY KEY(id))
                 """

使用Xpath解析,进行多层爬取

print("开始爬取欧美音乐榜单")
url = 'https://music.xxxxxxx.cn/v3/music/top/eur_usa'
driver_chom = webdriver.Chrome()
driver_chom.get(url)
# 使用xpath解析获取音乐的榜单
music_list = driver_chom.find_elements(By.XPATH, '//div[@id="js_songlist"]/div')
print(music_list)

# 进一步获取单个音乐的连接 进入详细页面 获得歌词 歌名 歌手 等信息
for url in music_list:
    detail_url = url.find_element(By.XPATH,'div[3]/span/a').get_attribute('href')
    print(detail_url)
    driver_edge = webdriver.Edge()
    driver_edge.get(detail_url)
    time.sleep(6)
    try:
        title = driver_edge.find_element(By.XPATH,"//div[@class='info_contain']/h2").text
        print(title)
        singer = driver_edge.find_element(By.XPATH, "//div[@class='info_singer']/a").text
        print(singer)
        zuoci = driver_edge.find_element(By.XPATH,"//div[@class='info_about']/p[1]/span").text
        print(zuoci)
        zuoqu = driver_edge.find_element(By.XPATH,"//div[@class='info_about']/p[2]/span").text
        print(zuoqu)
        album = driver_edge.find_element(By.XPATH, "//div[@class='info_about']/p[3]/span/a").text
        print(album)

保存信息

# 将歌曲的信息写入到数据库中
        print("将歌曲的信息写入到数据库中!")
        number = 0
        insert_sql = f"insert into music() values({number},'{title}','{singer}','{zuoci}','{zuoqu}','{album}')"
        try:
            cur.execute(insert_sql)
        except Exception as e:
            # 回滚事件
            conn.rollback()
        conn.commit()
        print("写入完成!")
        # 数据库的信息写入完毕开始保存歌曲的歌词
        file = open(f'./output/歌词信息/{title}_{singer}.txt', 'w',encoding='utf-8')
        try:
            geci = driver_edge.find_elements(By.XPATH,"/html/body/div[3]/div/div/div/p")
            for i in geci:
                file.write(i.text+'\n')
        except Exception as e:
            geci = driver_edge.find_element(By.XPATH, "/html/body/div[3]/div/div/div/p")
            file.write(geci.text+'\n')
        # 关闭歌词文件写入
        file.close()

完整代码

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# time: 2023/12/7 19:32
import time
import pymysql
from selenium import webdriver
from selenium.webdriver.common.by import By

# 创建一个链接对象
conn = pymysql.connect(host='master', user='root', password='123456', port=3306, db='spider')
# 创建游标
cur = conn.cursor()
sql_createTb = """CREATE TABLE music (
                 id INT NOT NULL AUTO_INCREMENT,
                 title VARCHAR(255),
                 signer  VARCHAR(255),
                 zuoci VARCHAR(255),
                 zuoqu VARCHAR(255),
                 album VARCHAR(255),
                 PRIMARY KEY(id))
                 """
try:
    cur.execute(sql_createTb)
except Exception as e:
    # 回滚事件
    conn.rollback()
conn.commit()
print("数据库建立完毕!")

# 爬取咪咕音乐 并且保存到数据库中
# 1.先确当url
print("开始爬取欧美音乐榜单")
url = 'https://music.migu.cn/v3/music/top/eur_usa'
driver_chom = webdriver.Chrome()
driver_chom.get(url)
# 使用xpath解析获取音乐的榜单
music_list = driver_chom.find_elements(By.XPATH, '//div[@id="js_songlist"]/div')
print(music_list)
# 进一步获取单个音乐的连接 进入详细页面 获得歌词 歌名 歌手 等信息
for url in music_list:
    detail_url = url.find_element(By.XPATH,'div[3]/span/a').get_attribute('href')
    print(detail_url)
    driver_edge = webdriver.Edge()
    driver_edge.get(detail_url)
    time.sleep(6)
    try:
        title = driver_edge.find_element(By.XPATH,"//div[@class='info_contain']/h2").text
        print(title)
        singer = driver_edge.find_element(By.XPATH, "//div[@class='info_singer']/a").text
        print(singer)
        zuoci = driver_edge.find_element(By.XPATH,"//div[@class='info_about']/p[1]/span").text
        print(zuoci)
        zuoqu = driver_edge.find_element(By.XPATH,"//div[@class='info_about']/p[2]/span").text
        print(zuoqu)
        album = driver_edge.find_element(By.XPATH, "//div[@class='info_about']/p[3]/span/a").text
        print(album)
        # 将歌曲的信息写入到数据库中
        print("将歌曲的信息写入到数据库中!")
        number = 0
        insert_sql = f"insert into music() values({number},'{title}','{singer}','{zuoci}','{zuoqu}','{album}')"
        try:
            cur.execute(insert_sql)
        except Exception as e:
            # 回滚事件
            conn.rollback()
        conn.commit()
        print("写入完成!")
        # 数据库的信息写入完毕开始保存歌曲的歌词
        file = open(f'./output/歌词信息/{title}_{singer}.txt', 'w',encoding='utf-8')
        try:
            geci = driver_edge.find_elements(By.XPATH,"/html/body/div[3]/div/div/div/p")
            for i in geci:
                file.write(i.text+'\n')
        except Exception as e:
            geci = driver_edge.find_element(By.XPATH, "/html/body/div[3]/div/div/div/p")
            file.write(geci.text+'\n')
        # 关闭歌词文件写入
        file.close()
    except Exception as f:
        print("*********************Error*********************")
        continue
# 关闭数据库访问
cur.close()
conn.close()

结果

在这里插入图片描述
在这里插入图片描述

欢迎学习指正!!!!!

  • 6
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

辣子不辣,英语不难

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值