前段时间做实验涉及到将fer2013数据集,fer2013数据集中存在一些非人脸,标签错误的图像。为了提高实验准确性,我将fer2013转换成图片,进行筛选删除后,把新的数据集转换成跟fer2013格式一样的数据集。
即涉及到将csv文件转换成图片以及将图片转换成像素写csv的过程。
由于在下是python新手,所以上网查了很多程序才基本实现,有不足望指出。
1.将fer2013转换成图片
这一步网上已经不少人写过,参考这个下面文章给出的程序:https://www.jianshu.com/p/edbffccb3743(包括了数据集下载)
代码如下:
import pandas as pd
import numpy as np
import scipy.misc as sm
import os
emotions = {
'0':'anger', #生气
'1':'disgust', #厌恶
'2':'fear', #恐惧
'3':'happy', #开心
'4':'sad', #伤心
'5':'surprised', #惊讶
'6':'normal', #中性
}
#创建文件夹
def createDir(dir):
if os.path.exists(dir) is False:
os.makedirs(dir)
def saveImageFromFer2013(file):
#读取csv文件
faces_data = pd.read_csv(file)
imageCount = 0
#遍历csv文件内容,并将图片数据按分类保存
for index in range(len(faces_data)):
#解析每一行csv文件内容
emotion_data = faces_data.loc[index][0]
image_data = faces_data.loc[index][1]
usage_data = faces_data.loc[index][2]
#将图片数据转换成48*48
data_array = list(map(float, image_data.split()))
data_array = np.asarray(data_array)
image = data_array.reshape(48, 48)
#选择分类,并创建文件名
dirName = usage_data
emotionName = emotions[str(emotion_data)]
#图片要保存的文件夹
imagePath = os.path.join(dirName, emotionName)
# 创建“用途文件夹”和“表情”文件夹
createDir(dirName)
createDir(imagePath)
#图片文件名
imageName = os.path.join(imagePath, str(index) + '.jpg')
sm.toimage(image).save(imageName)
imageCount = index
print('总共有' + str(imageCount) + '张图片')
if __name__ == '__main__':
saveImageFromFer2013('fer2013.csv')
但是我并没有成功,老是报错。这是因为我没有安装pillow,找到新的程序,用opencv库重新,改正了一下,参考https://blog.csdn.net/fl1623863129/article/details/106236615
代码如下:
import pandas as pd
import numpy as np
import scipy.misc as sm
import os
emotions = {
'0':'anger', #生气
'1':'disgust', #厌恶
'2':'fear', #恐惧
'3':'happy', #开心
'4':'sad', #伤心
'5':'surprised', #惊讶
'6':'normal', #中性
}
#创建文件夹
def createDir(dir):
if os.path.exists(dir) is False:
os.makedirs(dir)
def saveImageFromFer2013(file):
#读取csv文件
faces_data = pd.read_csv(file)
imageCount = 0
#遍历csv文件内容,并将图片数据按分类保存
for index in range(len(faces_data)):
#解析每一行csv文件内容
emotion_data = faces_data.loc[index][0]
image_data = faces_data.loc[index][1]
usage_data = faces_data.loc[index][2]
#将图片数据转换成48*48
data_array = list(map(float, image_data.split()))
data_array = np.asarray(data_array)
image = data_array.reshape(48, 48)
#选择分类,并创建文件名
dirName = usage_data
emotionName = emotions[str(emotion_data)]
#图片要保存的文件夹
imagePath = os.path.join(dirName, emotionName)
# 创建“用途文件夹”和“表情”文件夹
createDir(dirName)
createDir(imagePath)
#图片文件名
imageName = os.path.join(imagePath, str(index) + '.jpg')
sm.toimage(image).save(imageName)
imageCount = index
print('总共有' + str(imageCount) + '张图片')
if __name__ == '__main__':
saveImageFromFer2013('fer2013.csv')
然后这样就能得到fer2013里的图片了,生成的文件包括一个三个文件夹,分别是private test 、public test 以及 training。三个文件夹里都分别包含七个子文件夹,即7种表情。由此就可以对数据集进行筛选删除,去除非人脸,标签错误或者模糊表情图。
数据集删除完成后,需要生成与fer2013一样的格式的数据集方便后续使用。
即需要将所有图片转换成像素值表示,并且标明标签以及属于哪一个数据集。
2. 批量读取图片转化成像素值并以csv格式保存
import cv2
import os
import numpy as np
import pandas as pd
np.set_printoptions(threshold=np.nan)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import csv
def get_imlist(path):
return [os.path.join(path,f) for f in os.listdir(path) if f.endswith('.jpg')]
# 图像个数
# 遍历每张图片
#新的数据集命名为了fer2020,分别打开每个数据集进行编写,首先training部分:
with open("fer2020.csv", 'w',newline='') as f:
f.write('emotion,pixel,Usage\n')
c = get_imlist(r"Training/anger/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i],cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
data = cv2.resize(img_ndarray,(48,48))#图像大小48*48像素值
data = data.reshape(-1)
s=data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['0',pixels,"Training"])
c = get_imlist(r"Training/disgust/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
csv_writer = csv.writer(f)
s = data.shape
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['1', pixels, "Training"])
c = get_imlist(r"Training/fear/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['2', pixels, "Training"])
c = get_imlist(r"Training/happy/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['3', pixels, "Training"])
c = get_imlist(r"Training/sad/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['4', pixels, "Training"])
c = get_imlist(r"Training/surprised/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['5', pixels, "Training"])
c = get_imlist(r"Training/normal/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['6', pixels, "Training"])
#PrivateTest部分:
c = get_imlist(r"PrivateTest/anger/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['0', pixels, "PrivateTest"])
c = get_imlist(r"PrivateTest/disgust/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['1', pixels, "PrivateTest"])
c = get_imlist(r"PrivateTest/fear/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['2', pixels, "PrivateTest"])
c = get_imlist(r"PrivateTest/happy/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
csv_writer = csv.writer(f)
s = data.shape
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['3', pixels, "PrivateTest"])
c = get_imlist(r"PrivateTest/sad/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['4', pixels, "PrivateTest"])
c = get_imlist(r"PrivateTest/surprised/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['5', pixels, "PrivateTest"])
c = get_imlist(r"PrivateTest/normal/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['6', pixels, "PrivateTest"])
#publicTest部分:
c = get_imlist(r"PublicTest/anger/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
# print(pixels)
# 将结果写入 csv
csv_writer.writerow(['0', pixels, "PublicTest"])
c = get_imlist(r"PublicTest/disgust/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['1', pixels, "PublicTest"])
c = get_imlist(r"PublicTest/fear/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['2', pixels, "PublicTest"])
c = get_imlist(r"PublicTest/happy/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['3', pixels, "PublicTest"])
c = get_imlist(r"PublicTest/sad/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['4', pixels, "PublicTest"])
c = get_imlist(r"PublicTest/surprised/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['5', pixels, "PublicTest"])
c = get_imlist(r"PublicTest/normal/")
d = len(c)
for i in range(d):
img = cv2.imread(c[i], cv2.IMREAD_GRAYSCALE) # 打开图像
img_ndarray = np.asarray(img) # 将图像转化为数组并将像素转化到0-1之间
# noinspection PyRedeclaration
data = cv2.resize(img_ndarray, (48, 48))
data = data.reshape(-1)
s = data.shape
csv_writer = csv.writer(f)
pixels = (" ".join(str(s) for s in data))
csv_writer.writerow(['6', pixels, "PublicTest"])
f.close()
这样就完成对fer2013进行了筛选清洗。
有不足之处希望大家指出,谢谢。