爬虫学习推荐网站 :http://cuiqingcai.com/1052.html
#coding=utf-8
#下载百度贴吧的图片
import urllib
import urllib2
import re
import os
class worn:
def getPage(self):
url = 'http://tieba.baidu.com/p/4906753776'
request = urllib2.Request(url)
response = urllib2.urlopen(request)
return response.read()
def getImg(self,page):
pattern = r'src="(.*?\.jpg)".*?>'
pattern = re.compile(pattern)
items = re.findall(pattern,page)
x = 0
for imgurl in items:
urllib.urlretrieve(imgurl,'%s.jpg' % x) # 将文件下载下来
x+=1
if x>4:
return
worn1 = worn()
page = worn1.getPage()
worn1.getImg(page)