代码:
#coding = utf-8
import urllib
import re
#读取网站html内容
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
#找到图片并下载到指定文件夹
def getImg(html):
reg = 'src="(.+?\.jpg)" alt=' #图片的正则表达式
imgre = re.compile(reg) #编译正则表达式模式,返回一个对象的模式。
imglist = re.findall(imgre, html) #re.findall遍历匹配,可以获取字符串中所有匹配的字符串,返回一个列表。
x = 0
for imgurl in imglist:
name = imgurl[45:]
content = urllib.urlopen(imgurl).read()
open(r'image/' + name,'w+').write(content) #下载图片到指定文件夹image
x+=1
return imglist
html = getHtml("http://www.ivsky.com/")
print getImg(html)
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
def getImg(html):
reg = 'src="(.+?\.jpg)" alt='
imgre = re.compile(reg)
imglist = re.findall(imgre, html)
x = 0
for imgurl in imglist:
name = imgurl[45:]
content = urllib.urlopen(imgurl).read()
open(r'image/' + name,'w+').write(content)
x+=1
return imglist
html = getHtml("http://www.ivsky.com/")
print getImg(html)