import urllib.request
import re
#定义一个函数处理url
def handle_url(url,page):
url = url + str(page)
headers = {“User-Agent”:“Mozilla / 5.0(Windows NT 10.0; WOW64)AppleWebKit / 537.36(KHTML,像Gecko)Chrome / 65.0.3325.181 Safari / 537.36“}
req = urllib.request.Request(url = url,headers = headers)
return req
#定义一个函数用于处理
每个页面def handle_pages(req) :
res = urllib.request.urlopen(req)
html = res.read()。decode('utf8')
#匹配图片所在的div
pat = re.compile(r'<div class =“thumb”>。*? <img src =“(。*?)”alt =。*?>。*?</ div>',re.S)
src = pat。findall(html) #print
(src)
num = 1
for src:
#拼接上“http:”
url =“http:”+ url
urllib.request.urlretrieve(url,“./ images /”+ str(num)+“。jpg”)
num + = 1
def main():
url =“https:// www .qiushibaike.com / PIC /页/”
START_PAGE =输入( “请输入起始页:”)
end_page =输入( “请输入结束页:”)
打印( “开始下载”)
在范围页(INT(START_PAGE ),int(end_page)+1):
#print(handle_url(url,page))
handle_pages(handle_url(url,page))
print(“下载完毕!”)
if __name__ ==“__main__”:
main()
import re
#定义一个函数处理url
def handle_url(url,page):
url = url + str(page)
headers = {“User-Agent”:“Mozilla / 5.0(Windows NT 10.0; WOW64)AppleWebKit / 537.36(KHTML,像Gecko)Chrome / 65.0.3325.181 Safari / 537.36“}
req = urllib.request.Request(url = url,headers = headers)
return req
#定义一个函数用于处理
每个页面def handle_pages(req) :
res = urllib.request.urlopen(req)
html = res.read()。decode('utf8')
#匹配图片所在的div
pat = re.compile(r'<div class =“thumb”>。*? <img src =“(。*?)”alt =。*?>。*?</ div>',re.S)
src = pat。findall(html) #print
(src)
num = 1
for src:
#拼接上“http:”
url =“http:”+ url
urllib.request.urlretrieve(url,“./ images /”+ str(num)+“。jpg”)
num + = 1
def main():
url =“https:// www .qiushibaike.com / PIC /页/”
START_PAGE =输入( “请输入起始页:”)
end_page =输入( “请输入结束页:”)
打印( “开始下载”)
在范围页(INT(START_PAGE ),int(end_page)+1):
#print(handle_url(url,page))
handle_pages(handle_url(url,page))
print(“下载完毕!”)
if __name__ ==“__main__”:
main()