爬取图片下载到本地的时候,总有一些图片显示不完全,如下图所示
检测这种图片的一个比较好的方法就是判断jpg,jpeg文件结尾的标识
JPG,JPEG文件结尾的标识均为:\xff\xd9
代码如下
import os
import shutil
from PIL import Image
def is_valid_jpg(jpg_file):
with open(jpg_file, 'rb') as f:
f.seek(-2, 2)
buf = f.read()
return buf == b'\xff\xd9'
def is_valid_pic(pic_file):
if pic_file.endswith('jpeg'):
return is_valid_jpg(pic_file)
else:
return False
curDir = '需要检测的图片的文件夹路径'
for root, dirs, files in os.walk(curDir):
for file in files:
pic_file = os.path.join(root, file)
if not is_valid_pic(pic_file):
try:
img = Image.open(pic_file)
img.load()
except Exception as e:
#print(e)
print(pic_file)
#shutil.copy(pic_file, 'D:备份')
#os.remove(pic_file)