# 姓名: 晓晓
# 时间:2021/12/15 21:02
import requests
from lxml import etree
import os
if __name__=='__main__':
if not os.path.exists('xiaofang'):
os.mkdir('xiaofang')
url='https://www.yespik.com/search-sound/77822.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36'
}
resp = requests.get(url=url, headers=headers).text
# fp=open('a.html','w',encoding='utf-8')
# fp.write(resp)
# fp.close()
#数据解析
tree=etree.HTML(resp)
ul_list=tree.xpath('/html/body/div[4]/div[1]/div')
# print(ul_list)
for li in ul_list:
wang_url = li.xpath('div/div[3]/audio/source/@src')[0]
wang_url='http:'+ wang_url
print(wang_url)
name= li.xpath('div/div[2]/a/text()')[0]+'.mp3'
print(name)
data1 = requests.get(url=wang_url, headers=headers)
data2=data1.content
# # 压缩包路径
page3_path = 'xiaofang/' + name
with open(page3_path, mode='wb') as fp:
fp.write(data2)
print(name, "下载完成!!!")
爬取声音de
该博客展示了如何使用Python的requests和lxml库来抓取网页上的音频资源。作者首先创建了一个目录来保存下载的文件,然后通过发送HTTP请求获取HTML内容,解析HTML找到音频源URL,并下载保存为MP3文件。整个过程详细地说明了网络爬虫在获取多媒体内容方面的应用。
摘要由CSDN通过智能技术生成