#coding:utf8
import urllib
import re
def getHtml(url):
s = urllib.urlopen(url)
content = s.read()
return content
def getImg(html):
pat = r'http://imgsrc.baidu.com/.*sign=.*\.jpg'
#rem = re.compile(pat,re.I)
#t = r'http://imgsrc.baidu.com/forum/w%3D580/sign=b2d2b3bb632762d0803ea4b790ed0849/a317fdfaaf51f3deafb1b5fc94eef01f3a297923.jpg'
result = re.findall(pat,html)
print result[0]
print result[1]
#saveImg(result[0])
count = 1
for i in result:
saveImg(i,count)
count += 1
def saveImg(imgUrl,count):
basePath = r'D://python/img/'
pat = r'^http://.*/([^/]*\.jpg)$'
name = re.findall(pat,imgUrl)
path = basePath + name[0]
print path
imgContent = urllib.urlopen(imgUrl).read()
imgFile = open(path,'w')
imgFile.write(imgContent)
imgFile.close()
print '保存第%s张图片成功' %(count)
str = r'http://tieba.baidu.com/p/1782042697?see_lz=1'
html = getHtml(str)
print '#' * 20
getImg(html)
python抓取百度贴吧高清图片
最新推荐文章于 2024-10-17 11:21:39 发布