1. code
方式1:
import requests
import re
from time import sleep
class MyImage():
def __init__(self, keyword):
headers = {"User-Agent":"Python-urllib/2.6"}
self.session = requests.Session()
self.session.headers.update(headers)
self.keyword = keyword
def get_all_image_info(self):
images = []
url = "https://image.baidu.com/search/index"
params = {"word":self.keyword,
"tn":"baiduimage"}
res = self.session.get(url, params=params)
assert res.status_code == 200
image_urls = re.findall("thumbURL\":\"(.*?).jpg\"", res.text)
image_names = re.findall("\"fromPageTitle\":\"(.*?)\"", res.text)
for index, ele in enumerate(image_names, 0):
move = dict.fromkeys((ord(c) for c in u"0123456789【】!@#$%^&*()[]{};:,./<>?\|`~-=_+strongxp "))
name = ele.translate(move).strip()
temp_dict = {"desc":name, "url":image_urls[index] + ".jpg"}
images.append(temp_dict)
return images
def download_image(self, image_name, image_url):
res = self.session.get(image_url)
data = res.content
with open(f"{image_name}.jpg","wb") as file_object:
# 写入数据
file_object.write(data)
# 缓一缓
sleep(0.5)
def download_all_image(self):
images = self.get_all_image_info()
for item in images:
self.download_image(item["desc"], item["url"])
if __name__=="__main__":
m = MyImage("壁纸")
m.download_all_image()
补充
#正则表达式匹配图片
import re
import requests
url = "https://www.baidu.com/"
res = requests.get(url)
res.encoding="utf8" #根据返回的html编码设置:charset=utf-8
html_str = res.text
pat = '<img.*src=(.*?) width'
result = re.findall(pat, html_str) #['//www.baidu.com/img/bd_logo1.png']
image_url = f'https:{result[0]}' #https://www.baidu.com/img/bd_logo1.png
#下载图片
res = requests.get(image_url)
print(res.text)
with open("baidu_logo.png","wb") as fp: #wb:二进制打开
fp.write(res.content) #res.content字节, res.text字符串
方式2:
import requests
from PIL import Image
from io import BytesIO
import os
url = "https://www.baidu.com/img/PCtm_d9c8750bed0b3c7d089fa7d55720d6cf.png"
res = requests.get(url ,verify=False)
i = Image.open(BytesIO(res.content))
i.save(os.path.join(r'C:\Users\SDS\eclipse-workspace\shujufenxi\src\n', 'image.png'), quality=85)