#小剪子vV 提醒:好好学习 天天向上
import os
import requests
from bs4 import BeautifulSoup
# if not os.path.exists('美女图片'):
# os.mkdir('美女图片')
#url = "https://pic.netbian.com/4kmeinv"
def craw_html(url):
resp = requests.get(url)
resp.encoding ="gbk"
# print(resp.status_code)
html = resp.text
#print(html)
return html
def parse_and_download(html):
#解析图片的地址
soup = BeautifulSoup(html,"html.parser")
imgs = soup.find_all("img")
for img in imgs:
src = img["src"]
if "/uploads/" not in src:
continue
src = f"https://pic.netbian.com{src}"
print(src)
#首先得到图片的本地文件地址
filename = os.path.basename(src)
with open(f"美女图片/{filename}", "wb") as f:
resp_img = requests.get(src)
f.write(resp_img.content)
#网页图片开始页和结束页(可以手动更改)
urls = ["https://pic.netbian.com/4kmeinv/"] + [
f"https://pic.netbian.com/4kmeinv/index_{i}.html"
for i in range(2,2)
]
for url in urls:
print("##########正在爬取:",url)
html = craw_html(url)
parse_and_download(html)
Python爬取网络图片
于 2023-12-29 14:45:15 首次发布