下载微信文章中图片
downloadIMage.py
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import re,os
import urllib,urllib2;
import time
import sys
reload(sys)
sys.setdefaultencoding('utf8')
#通过url获取网页
def getHtml(url):
# 要设置请求头,让服务器知道不是机器人
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent': user_agent}
request=urllib2.Request(url,headers=headers);
page = urllib2.urlopen(request);
html = page.read()
return html
#通过正则表达式来获取图片地址,并下载到本地
def getImg(html,savePath):
#http://mmbiz.qpic.cn/mmbiz_jpg/wlJkphkR2NMibwTo1cqHwdhLTMYmbV0IOw5vCaJuTsbvTdukCQwUicPClXRibcnY8RCsszAfBYlrJnfz8icUIBWWGw/640?wx_fmt=jpeg
reg = r'data-src="(.*?)"'
imgre = re.compile(reg)
imglist = imgre.findall(html)
x = 0
for imgurl in imglist:
try:
#通过urlretrieve函数把数据下载到本地的D:\\images,所以你需要创建目录
urllib.urlretrieve(imgurl, savePath+'\\%s.jpg' % x)
print "[+] imgurl =%s" % imgurl
except:
print "[-] imgurl =%s"%imgurl
finally:
if imgurl!='':
x = x + 1
time.sleep(1)
def bookUrl(html):
reg = r'<a href=\"(.*?)\" target=\"_blank\">(.*?)<\/a>'
imgre = re.compile(reg)
imglist = imgre.findall(html)
for item in imglist[:6]:
url, bookName =item[0],item[1]
savePath =r'C:\Users\pradmin\Desktop\downloadImage\images\\'+bookName.decode("utf-8")
print("[+] url =%s"%url)
html=getHtml(url)
#os.mkdir(savePath)
getImg(html,savePath)
originUrl="http://mp.weixin.qq.com/s?__biz=MzA4NjQzNzY4Mw==&mid=2454531002&idx=4&sn=67826657f4486bfa0cb8f195262a86f9&chksm=887131e6bf06b8f09b2ec821f49c71c64536cf585d9f17664709fcfc533d39c976c30da91a8d&mpshare=1&scene=1&srcid=1215qBhkFwNhLrfnZlSMmZSj#rd"
html = getHtml(originUrl)
bookUrl(html)