准备工具:
Python 3.4.3
图片网站
import re import time from bs4 import BeautifulSoup from urllib.request import urlopen, urlretrieve def unzip(data, charset='utf8'): import gzip return gzip.decompress(data).decode(charset) def getHtml(url, charset='utf8'): print(url) resp = urlopen(url) encoding = resp.info().get('Content-Encoding') if 'gzip' == encoding: return unzip(resp.read(), charset) return resp.read().decode(charset) def getImg(url): html = getHtml(url) soup = BeautifulSoup(html, 'html.parser') imgs = soup.findAll(name="img", attrs={"src":re.compile("\.jpg")}) for img in imgs: imgUrl = img['src'] print(imgUrl) urlretrieve(imgUrl, '%s.jpg'%time.time()) def main(): getImg('http://www.mmjpg.com/') if __name__=='__main__': main()
贴个执行结果图:(逃ing...)
转载请注明来源【 IT黑名单】