最近空闲时间研究的python,大家可以看下。
#coding=utf-8
import urllib2,urllib
import re
import os
import sys
reload(sys)
sys.setdefaultencoding('utf8')
# 创建存放图片的文件夹
if not os.path.exists('D:\\image\\1'):
print 1
os.mkdir('D:\\image\\1')
# 获取网页的源代码
def getHtml(url):
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent': user_agent}
request = urllib2.Request(url, headers=headers);
page = urllib2.urlopen(request);
html = page.read()
print html
return html
#从源代码中通过正则表达式获取图片的路径并下载
def getImg(html):
# relink = 'src="(http.*.jpg)" '
reg = r'src="(https://img.*?\.jpg)"'
imgre = re.compile(reg)
print reg
imglist = re.findall(reg,html)
print imglist
x = 0
for imgurl in imglist:
print imgurl
urllib.urlretrieve(i