# 用正则层层爬取图片
from urllib import request
import re
base_url = 'http://www.mmonly.cc/wmtp/fjtp/list_21_{}.html'
def download(pic_url):
print('downloading...%s' % pic_url)
fname = pic_url.split('/')[-1]
request.urlretrieve(pic_url,'images/' + fname)
def getPage():
for i in range(1,73):
fullurl = base_url.format(i)
response = request.urlopen(fullurl)
html = response.read().decode('gb2312','ignore')
url_pat = re.compile(r'<div class="btns" > <a class="img_album_btn" href="http://www.mmonly.cc/wmtp/fjtp/(.*?)"', re.S)
res = url_pat.findall(html)
for url in res:
new_url = 'http://www.mmonly.cc/wmtp/fjtp/' + url
response = request.urlopen(new_url)
html = response.read().decode('gb2312','ignore')
img_pat = re.compile(r'<img alt=".*?" src="(.*?)"')
res = img_pat.findall(html)
for pic_url in res:
download(pic_url)
if __name__ == '__main__':
getPage()
# 爬取部分结果如下(记得在同级目录上先建立一个images文件夹)
C:\Users\cz\AppData\Local\Programs\Python\Python35\python.exe E:/Python/python高阶/43.py
downloading...http://t1.mmonly.cc/uploads/tu/201703/54/131.jpg
downloading...http://t1.mmonly.cc/uploads/tu/201703/54/151.jpg
downloading...http://t1.mmonly.cc/uploads/tu/201703/54/141.jpg
downloading...http://t1.mmonly.cc/uploads/tu/201703/54/121.jpg
downloading...http://t1.mmonly.cc/uploads/tu/201703/54/111.jpg
downloading...http://t1.mmonly.cc/uploads/tu/201608/154/oamsep4zucp.jpg
downloading...http://t1.mmonly.cc/uploads/tu/201608/155/dj1ju5h0ihi.jpg
downloading...http://t1.mmonly.cc/uploads/allimg/20150727/pnh1hrge3tn.jpg
downloading...http://t1.mmonly.cc/uploads/tu/201703/37/151.jpg
downloading...http://t1.mmonly.cc/uploads/tu/201703/37/141.jpg
downloading...http://t1.mmonly.cc/uploads/tu/201703/37/131.jpg
downloading...http://t1.mmonly.cc/uploads/tu/201703/37/121.jpg
downloading...http://t1.mmonly.cc/uploads/tu/201703/37/13b1OOOPICe1.jpg
downloading...http://t1.mmonly.cc/uploads/tu/201703/30/151.jpg
downloading...http://t1.mmonly.cc/uploads/tu/201703/30/141.jpg
downloading...http://t1.mmonly.cc/uploads/tu/201703/30/131.jpg
Process finished with exit code 1
兄弟连学python
Python学习交流、资源共享群:563626388 QQ