图片处理 - 使用beautifulSoup模块
标签(空格分隔): python
使用beautifulSoup模块
使用BeautifulSoup对HTML内容解析之后,HTML内容就变成了结构化数据,可以轻易对其中的DOM元素进行操作,比如获取数据,修改,删除等。
from bs4 import BeautifulSoup
import urllib,urllib2
def getContext(url):
'''
获取html
'''
html = urllib2.urlopen(url).read()
return html
def getImages(html):
soup = BeautifulSoup(html)
allPic = soup.findAll('img',class_="BDE_Image")
for imgUrl in allPic:
print imgUrl['src']
# return a list
return [imgUrl['src'] for imgUrl in allPic]
def downLoadPicture(urlList):
i = 1
for item in urlList:
urllib.urlretrieve(item, '%s.jpg' % i)
i +=1
url = 'http://tieba.baidu.com/p/3932177087'
if __name__=='__main__':
html = getContext(url)
urls = getImages(html)
downLoadPicture(urls)