import json
import os
import re
import requests
if __name__ == "__main__":
#伪装浏览器标识
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
}
#获取网页内源码文本
html = requests.get(url='https://www.xiaohongshu.com/explore', headers=headers).text
#通过正则表达式匹配出需要的JSON数据
result=re.findall('window.__INITIAL_SSR_STATE__={"RedAppLayout":undefined,"Main":(.*?)}</script>',html)[0]
obj=json.loads(result)
lst=obj['columnNotes']
#建立下载目录
if(not os.path.exists('pic')):
os.mkdir('pic')
#循环下载图片
for item in lst:
url=item['cover']['url']
#将图片地址中的分辨率更改为1080
url=url.replace("w/540/format","w/1080/format")
fileName=re.findall('-([0-9a-z]{12})\?imageView2',url)[0]
imgData=requests.get(url=url,headers=headers).content
with open('pic/'+fileName+'.jpg','wb') as fp:
fp.write(imgData)
print(fileName+'下载完成')
12.使用Python正则表达式解析图片并下载至本地
于 2022-04-12 12:38:00 首次发布