爬取网站图片(网站地址隐藏)
import requests
from bs4 import BeautifulSoup
def spider(url):
listpage=requests.get(url)
listpagesoup=BeautifulSoup(listpage.text,"html.parser")
# 获取一页中所有帖子的地址
urllist=[]
for td in listpagesoup.find_all("td",class_="icon"):
urllist.append(td.a["href"])
# 获取帖子中每一张原图的地址
picsourcelist=[]
for url in urllist:
picpage=requests.get(url)
picpagesoup=BeautifulSoup(picpage.text,"lxml")
# CSS选择器
imgsrcslist=picpagesoup.select('div[class="editor_content"] > a > span > span > img ')
for imgsrc in imgsrcslist:
picsourcelist.append(imgsrc["src"])
# 保存为图片
# enumerate函数:获取列表中每个元素的索引和值
for index,picsource in enumerate(picsourcelist):
pic=requests.get(picsource)
filename=r"D:\av\%d.jpg"%index
with open(filename,"wb") as file:
file.write(pic.content)
for i in range(20):
url="http://www.XXXXXXXX.com/thread/81/%d?type=11"%i
spider(url)