import requests
from tqdm import tqdm
for i in tqdm(range(0,100)):
url = "https://iw233.cn/API/Ghs.php"
payload = {}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
img = response.content
open(r"img\{0}.jpg".format(i),"wb").write(img)
这样爬下来的图片可能有重复,进一步修改,计算图片的md5来判断图片是否重复
import requests
from tqdm import tqdm
import hashlib
imghash=[]
i=0
while i in tqdm(range(100)):
url = "https://iw233.cn/API/Ghs.php"
payload = {}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
img = response.content
# print(imghash)
md5=hashlib.md5(img).hexdigest()
if md5 not in imghash:
imghash.append(hashlib.md5(img).hexdigest())
open(r"img\{0}.jpg".format(i), "wb").write(img)
print("正在保存图片 : {0}.jpg".format(i))
i=i+1
else:
print("出现重复")
在加点料,根据图片量下载
import requests
from tqdm import tqdm
import hashlib
imghash=[]
i=0
print("请问需要多少张二刺螈图片:")
a=int(input())
while i in tqdm(range(a)):
url = "https://iw233.cn/API/Ghs.php"
payload = {}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
img = response.content
# print(imghash)
md5=hashlib.md5(img).hexdigest()
if md5 not in imghash:
imghash.append(hashlib.md5(img).hexdigest())
open(r"img\{0}.jpg".format(i), "wb").write(img)
print("正在保存图片 : {0}.jpg".format(i))
i=i+1
else:
print("出现重复")
在加点料,用opencv控制图片的宽高
import requests
from tqdm import tqdm
import hashlib
import cv2
imghash=[]
i=0
print("请问需要多少张二刺螈图片:")
a=int(input())
print("对宽的需求(至少)")
w = int(input())
print("对高的需求(至少)")
h = int(input())
while i in tqdm(range(a)):
url = "https://iw233.cn/API/Ghs.php"
payload = {}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
img = response.content
open(r"img\tmp.jpg", "wb").write(img)
imgcv2 = cv2.imread(r"img\tmp.jpg")
try:
height = int(imgcv2.shape[0])
width = int(imgcv2.shape[1])
print(width,height)
if width >w and height>h:
md5 = hashlib.md5(img).hexdigest()
if md5 not in imghash:
imghash.append(hashlib.md5(img).hexdigest())
open(r"img\{0}.jpg".format(i), "wb").write(img)
print("正在保存图片 : {0}.jpg".format(i))
i = i + 1
else:
print("出现重复")
except:
continue