本人使用python版本为3.6.5
python3中内置urllib模块
python2中内置urllib2模块
#coding:utf-8
import urllib.request
import re
# 将正则表达式编译成Pattern对象
rex=r'src="(https://.*?\.gif)"';
pages = ('1','2');
x=1;
#输入您要爬的网址
pageurl=input()
for page in pages:
#pageurl = "http://***********.com/default_%s.html" % page;
Response=urllib.request.urlopen(pageurl);
print(Response)
Html=Response.read();
print(Html.decode('utf-8','ignore'))
lists = re.findall(rex, Html.decode('utf-8','ignore'));
#print(lists)
lensofpage=len(lists);
print (lensofpage)
picname = 'page' + page;
print (picname)
for picurl in lists:
#设置存储路径
urllib.request.urlretrieve(picurl,r'C:\Users\hipeson\Desktop\pic\%s.gif' %x);
print (page+picurl)
x=x+1;
print ('DownLoadPicOver')