from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import re
import os
from urllib.request import urlretrieve
url="https://mp.weixin.qq.com/s?__biz=MzA3MjcxMDMyMg==&mid=2247525125&idx=5&sn=c0e6722238e3bbf63d1ac6dd5c769f6a&chksm=9f1802d8a86f8bce37c27629e6ac4a073f3c798712b52f3e1237e453645f0b6a7e44f91e6d17&scene=21#wechat_redirect"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}
ret = Request(url, headers=headers)
res = urlopen(ret)
bs = BeautifulSoup(res,'html.parser')
til = bs.find_all('a',{"target":"_blank"})
for url in til:
d=url["href"]
ret = Request(d, headers=headers)
res = urlopen(ret)
bs = BeautifulSoup(res,'html.parser')
til = bs.find('meta',{"property":"og:title"})
strname=til["content"]
dir=os.path.abspath('d:/')
dir=dir+strname+"/"
print(dir)
os.makedirs(dir)
img_urls=bs.find_all("img",{"class":"rich_pages js_insertlocalimg"})
i=1
for img_url in img_urls:
print(img_url["data-src"])
work_path=os.path.join(dir,str(i)+'.jpeg')
urlretrieve(img_url["data-src"],work_path)
i=i+1
爬虫
最新推荐文章于 2024-07-28 15:46:11 发布