#coding:gbk
import os
from statistics import quantiles
import requests
import re #正则表达式,进行文字匹配
import urllib.request,urllib.error #制定URL,获取网页数据
from bs4 import BeautifulSoup #网页解析,获取数据
#创建正则表达式对象,表示规则
findlink = re.compile(r'<link href="(.*?)" itemprop=')
def main():
baseurl = "https://www.hippopx.com/zh"
Datelist,quantity = getDate(baseurl)
while(quantity):
Download(Datelist[quantity-1][0]) #下载图片
quantity-=1
def getDate(baseurl):
a=0
datelist = []
html = askURL(baseurl)
Soup = BeautifulSoup(html,"html.parser")
for item in Soup.select("li"): #查找符合要求的字符串,形成列表
date = [] #保存一张图片的数据
item = str(item)
link = findlink.findall(item) #获取图片链接
if len(link):
date.append(link[0])
a+=1
if(len(date)):
datelist.append(date)
return datelist,a
#得到指定URL的网页内容
def askURL(url):
head = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763"}
#模拟浏览器头部信息,向服务器发送消息
request = urllib.request.Request(url,headers=head)
html = ""
try:
response = urllib.request.urlopen(request)
html = response.read().decode("utf-8")
except urllib.error.URLError as e:
if hasattr(e,"code"):
print(e.code)
if hasattr(e,"reason"):
print(e.reason)
return html
def Download(Url):
url=Url
d='D:\\B\\'
path=d+url.split('/')[-1]
try:
if not os.path.exists(d):
os.mkdir(d)
if not os.path.exists(path):
r=requests.get(url)
r.raise_for_status()
with open(path,'wb') as f:
f.write(r.content)
f.close()
print("图片保存成功")
else:
print("图片已存在")
except:
print("图片获取失败")
if __name__ == "__main__":
main()
利用python爬取图片并下载到本地
于 2022-06-05 21:36:58 首次发布