http://www.cnblogs.com/fnng/p/3576154.html 参考 #coding=utf-8 import urllib import re def getHtml(url): page = urllib.urlopen(url) html = page.read() return html def getImg(html): reg = r'src="(.+?\.png)"' imgre = re.compile(reg) imglist =re.findall(imgre,html) x=0 for imgurl in imglist: urllib.urlretrieve('http://www.docshouse.cn/'+imgurl,'%s.png' %x) x+=1 return imglist html = getHtml("http://www.docshouse.cn/") print getImg(html)
python 爬虫
最新推荐文章于 2024-04-01 13:30:49 发布