# _*_ coding:utf-8 _*_
import urllib
from bs4 import BeautifulSoup
def get_content(url):
""" doc."""
html = urllib.urlopen(url)
content= html.read()
html.close()
return content
def get_images(info):
""" doc.
<img style="-webkit-user-select: none;" src="https://imgsa.baidu.com/forum/w%3D580/sign=ab8a793e69d9f2d3201124e799ed8a53/4d16738da97739126365c3bef3198618377ae240.jpg"
width="580" height="435">
"""
soup = BeautifulSoup(info)
#找到所有 img 标签 然后后面跟的class = BDE_Image
all_img = soup.find_all('img',class_="BDE_Image")
#设置计数器
x = 1
for img in all_img:
image_name = '%s.jpg' % x
urllib.urlretrieve(img['src'],image_name)
x += 1
#return len(all_img)
print all_img
info = get_content('http://tieba.baidu.com/p/2772656630')
print get_images(info)
python 用BeautifulSoup爬取贴吧图片
最新推荐文章于 2024-07-23 09:30:00 发布