【人脸识别】人脸数据集汇总

Color RGB

color FERET Database

处理color FERET Database数据,每个ppm文件被压缩,因此先解压再转换格式

python处理代码:

import bz2

# 指定要解压缩的文件夹路径
rootdir = r'C:\Users\xxx\Downloads\colorFERETDatabase\colorferet'
for folder_path, dir, fns in os.walk(rootdir):
# 遍历文件夹中的所有文件
# for filename in os.listdir(folder_path):
    # 如果文件是以.bz2结尾的压缩文件    
    if len(dir) == 0:
        print(folder_path, dir, filename)
        for filename in fns:
            if filename.endswith('.bz2'):
                # 拼接文件的完整路径
                file_path = os.path.join(folder_path, filename)
                # 打开压缩文件
                with bz2.open(file_path, 'rb') as f:
                    # 读取压缩文件中的数据
                    data = f.read()
                # 拼接解压后的文件路径
                output_path = os.path.join(folder_path, filename[:-4])
                # 将解压后的数据写入文件
                with open(output_path, 'wb') as f:
                    f.write(data)
                ppm = Image.open(output_path)

                # 将图像转换为png格式并保存        
                ppm.save(output_path[:-3] + 'png')
                # 删除原始的压缩文件
                os.remove(output_path)
                os.remove(file_path)

IMDB_face dataset

IMDb-Face is a new large-scale noise-controlled dataset for face recognition research. The dataset contains about 1.7 million faces, 59k identities, which is manually cleaned from 2.0 million raw images. All images are obtained from the IMDb website. A detailed introduction of IMDb-Face can be found in the paper(https://arxiv.org/abs/1807.11649).

python处理代码:

import os
import csv
import requests
from multiprocessing.dummy import Pool as ThreadPool
from PIL import Image

def remove_empty_directories(directory):
    for root, dirs, files in os.walk(directory, topdown=False):
        for dir in dirs:
            folder_path = os.path.join(root, dir)
            if not os.listdir(folder_path):  # 检查文件夹是否为空
                os.rmdir(folder_path)  # 删除空文件夹
                print(f"Deleted empty folder: {folder_path}")

def download_image(image_url, save_path, bbox):
    try:        
        response = requests.get(image_url, stream=True)
        response.raise_for_status()
        with open(save_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
        print(f"Downloaded {save_path}")
        # try:
        #     with Image.open(save_path) as img:
        #         cropped_img = img.crop((bbox[0], bbox[1], bbox[2], bbox[3]))
        #         cropped_img.save(save_path[:-4] + "_crop.jpg")
        #     print(f"Saved {save_path}")
        # except Exception as e:
        #     print(f"Failed to save {save_path}: {str(e)}")
    except Exception as e:
        print(f"Failed to download {save_path}: {str(e)}")
    
    # try:
    #     with Image.open(save_path) as img:
    #         cropped_img = img.crop((bbox[0], bbox[1], bbox[2], bbox[3]))
    #         cropped_img.save(save_path)
    #     print(f"Saved {save_path}")
    # except Exception as e:
    #     print(f"Failed to save {save_path}: {str(e)}")


def crop_and_save_image(image_url, save_path, bbox):
    try:
        with Image.open(save_path) as img:
            cropped_img = img.crop((bbox[0], bbox[1], bbox[2], bbox[3]))
            cropped_img.save(save_path)
        print(f"Saved {save_path}")
    except Exception as e:
        print(f"Failed to save {save_path}: {str(e)}")


def download_dataset(csv_file, data_dir, num_processes=8):
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    with open(csv_file, "r") as file:
        reader = csv.DictReader(file)
        url_list = []
        i = 0
        for row in reader:
            i += 1            
            url = row['url']
            image_id = row['name']
            image_num = row['image']
            face_rect = row['rect']
            # print(url, image_id, image_num, face_rect)
            # bbox = [int(face_rect[0]), int(face_rect[1]), int(face_rect[2]), int(face_rect[3])]
            bbox = [int(i) for i in row['rect'].split(' ')]

            save_dir = os.path.join(data_dir, image_id[:])
            if not os.path.exists(save_dir):
                os.makedirs(save_dir, True)

            save_path = os.path.join(save_dir, image_num)
            # print(save_dir, save_path, url_list[:2])

            url_list.append((url, save_path, bbox))
            # i += 1
            # if i > 2000:
            #     break
        print('csv loaded')      
        print('len : ', len(url_list))     
    pool = ThreadPool(num_processes)
    pool.starmap(download_image, url_list)
    pool.close()
    pool.join()

    # pool = ThreadPool(num_processes)
    # pool.starmap(crop_and_save_image, url_list)
    # pool.close()
    # pool.join()
    remove_empty_directories(data_dir)


csv_file = "IMDb-Face.csv"
save_dir = 'IMDB_temp'  
download_dataset(csv_file, save_dir, num_processes=8)
# remove_empty_directories(save_dir)

NIR

CBSR NIR Face Dataset

http://vcipl-okstate.org/pbvs/bench/Data/07/download.html

下载即可用

mp4抽帧保存为png

import cv2
import os

# 定义视频文件路径
video_path = "video"
# 定义保存PNG文件的文件夹路径
save_folder = "png"

# 创建保存PNG文件的文件夹
if not os.path.exists(save_folder):
    os.makedirs(save_folder, exist_ok=True)

for mp4 in os.listdir(video_path):
    # 打开视频文件
    cap = cv2.VideoCapture(os.path.join(video_path, mp4))
    val = cap.isOpened()

    # 获取视频帧率
    fps = cap.get(cv2.CAP_PROP_FPS)

    # 获取视频总帧数
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print(mp4, frame_count)

    # 循环读取视频帧
    # for i in range(0, frame_count, frame_count//1200 + 1):
    i = 0
    while val:
        i += 1
        # 读取视频帧
        ret, frame = cap.read()

        # 如果读取失败,则退出循环
        if not ret:
            break                
        
        # 生成保存PNG文件的文件名
        save_path = os.path.join(save_folder, mp4[:mp4.find('.')], f"{i+1:06d}.png")

        # 创建保存PNG文件的文件夹
        if not os.path.exists(os.path.dirname(save_path)):
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
        # 保存PNG文件
        if i % (frame_count//1200 + 1) == 0:
            cv2.imwrite(save_path, frame)

        # 输出进度信息
        # print(f"{mp4} Processed {i+1}/{frame_count} frames")

    # 关闭视频文件
    cap.release()

更多数据集欢迎在评论区补充…

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

yddcs

你的鼓励--创作的动力!!!

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值