示例代码如下
import requests
import time
#准备一个存放图片url的列表,方便后续进行循环遍历来发送请求
#去网页找到你想要爬取图片的url地址,这个是关键
lst_photo=[
"https://i.gtimg.cn/qqlive/img/jpgcache/files/qqvideo/hori/x/x5kmpl76yuprscv.jpg",
"https://img1.qunarzz.com/travel/d0/1412/8f/8f3c4fc8c986bab8cdb.jpg_r_1360x1360x95_49073a69.jpg",
"https://img1.qunarzz.com/travel/d9/1412/4e/4ece2abadf170d03cdb.jpg_r_1360x1360x95_ed2997d3.jpg",
"https://ts1.cn.mm.bing.net/th/id/R-C.7893a5a9d9cec3c0a233ee9e34d20c4b?rik=rrVZKXtythhjdw&riu=http%3a%2f%2fimg.coolban.com%2ftimg%2f130131%2f10022KR2-26.jpg&ehk=FkKfc1cB0rTlwSePlC%2bAhZb%2f900TZ9Hk0m7g8PYDwYc%3d&risl=&pid=ImgRaw&r=0",
"https://ts1.cn.mm.bing.net/th/id/R-C.001dfb852c210e5137e7ab5405f83dcd?rik=9hCsqGqmfaIwHA&riu=http%3a%2f%2fimg.coolban.com%2ftimg%2f130131%2f10013V444-17.jpg&ehk=m7%2fOrcke%2bh1ZEQsQ7Jpvr3olGUiHxvNRwy0Lw6e6WxE%3d&risl=&pid=ImgRaw&r=0"
]
#准备一个存放音频url的列表
#去网页找到你想要爬取音频的url地址
lst_music=[
"https://m804.music.126.net/20240119101650/3571a785a90b30c456e40ba31141f402/jdyyaac/obj/w5rDlsOJwrLDjj7CmsOj/27224155691/e8f0/bbf1/e707/7e758c14ce621f3a2f68361407542ad2.m4a?authSecret=0000018d1f6ad08806500aaba3b4fdd5",
"https://m801.music.126.net/20240119102304/f3eb51f0eb6bdf32e5d1b2a6d48b78e9/jdyyaac/520b/565a/5452/ea8ebf9b16a040da1ac7c8b9e3eb6c0a.m4a",
"https://m801.music.126.net/20240119102605/ba3c4878283f61a0741cc564efe97c62/jdyyaac/obj/w5rDlsOJwrLDjj7CmsOj/19812569115/85e0/22c9/16e2/87f554ae6e347f29793a3c0075534bdf.m4a",
"https://m701.music.126.net/20240119102630/69d397be92a0037fb1880ae006aeceb2/jdyyaac/obj/w5rDlsOJwrLDjj7CmsOj/5414820151/d672/34cf/e42e/63804096d0e493b6ff604f551cc983c3.m4a",
"https://m10.music.126.net/20240119102651/651223069d84526fadc7a435c51cf3e5/yyaac/obj/wonDkMOGw6XDiTHCmMOi/3036964176/0ed2/24ee/d25e/e1f5fe4831493c9846a2e8c4e6061838.m4a"
]
h={
#这是在定义请求头的信息,当前只定义了一个user-agent(网络服务器通过它来识别当前操作系统的信息,以此判别是否是爬虫机器人在访问服务器)
#如果想要加请求头信息,比如:referer,accept,cookie.....;用逗号隔开,以键值对的形式进行添加即可
"User-Agent":"(找到自己的user-agent)"
}
#设定一个文件自增变量
num=0
#利用for循环,循环遍历列表中url发送请求
for url in lst_photo:
#发送get请求
response=requests.get(url,headers=h)
#查看请求响应码,如果为200表示请求成功
if response.status_code==200:
#使用with open模块与本地文件进行交互,将音频,图片的二进制信息写入本地
with open(f"{num}.jpg","wb") as f:
f.write(response.content)
#每请求一次,文件变量名自增
num+=1
#每一次本地写入,缓冲时间设定1秒
time.sleep(1)
for url in lst_music:
response=requests.get(url,headers=h)
if response.status_code==200:
with open(f"{num}.m4a","wb") as f:
f.write(response.content)
num+=1
time.sleep(1)
print("保存完毕")
关于如何找到自己的user-agent,如下图: