方法1
1 import re
2 import urllib
3 import urllib.request
4
5 def gethtml(url):
6 page = urllib.request.urlopen(url)
7 html = page.read()
8 return html
9
10 def getimage(html,x):
11 #https://mmbiz.qpic.cn/mmbiz_jpg/ib55rg6wzuc3b16kiy3uu53nkcttdic8uea4wwbpahj8lpibvankps2fztyjrv7w7dbeenrhfvpuuyrenaxsldgja/640?wx_fmt=jpeg
12 #https://mmbiz.qpic.cn/mmbiz_jpg/ib55rg6wzuc3b16kiy3uu53nkcttdic8uehqoci7r86nehl2neforaqvctiaeaiuwjtwpknxnnxipuuuqnujefkyw/640?wx_fmt=jpeg
13 #此处正则为重点
14 reg = 'data-src="(.*?)"'
15 image = re.compile(reg)
16 imlist = re.findall(reg,html.decode('utf-8'))
17
18 print(imlist)
19 for i in imlist:
20 print(i)
21 print(x)
22 urllib.request.urlretrieve(i,'%s.jpg' % x)
23 x +=1
24 return x
25 x=1
26 url = 'https://mp.weixin.qq.com/s/mvdcn0o3093olihmykqbia'
27 html = gethtml(url)
28 x = getimage(html,x)
29 print('下载完成')
30 #下载结果与此.py文件在同一目录
方法2:beautifulsoup 避免写正则表达式(因为不会)
import requests
import urllib.request
from bs4 import beautifulsoup
url = "https://mp.weixin.qq.com/s/cm3bua0um1jbznr2de7twg"
r = requests.get(url)
demo = r.text
soup = beautifulsoup(demo,"html.parser")
piclist=[]
for link in soup.find_all('img'):
link_list = link.get('data-src')
if link_list != none:
piclist.append(link_list)
#print(piclist)
#print(type(link_list))
x = 0
for http in piclist:
print(http)
#f:\桌面\pa 是存储路径,需要先建立文件夹
filesavepath = r'f:\桌面\pa\%s.jpg' % x
urllib.request.urlretrieve(http,filesavepath)
x +=1
print('正在保存第{:.0f}张图片'.format(x))
print('下载完成')
希望与广大网友互动??
点此进行留言吧!