这一部分主要以生成图片为例。
数据集:
1)https://pan.baidu.com/s/1kVSA8z9 (密码: atqm)
2)https://pan.baidu.com/s/1ctbd9O (密码: kubu)
1)数据的预处理:
将下载的图片放到一个girls文件夹下,进行压缩,去重复操作。
yasuo.py: 图片被缩放成64*64的格式
# -*- coding: utf-8 -*-
import os
import cv2
import numpy as np
image_dir = 'girls'
new_girl_dir = 'little_girls'
if not os.path.exists(new_girl_dir):
os.makedirs(new_girl_dir)#目录不存在则创建新目录
for img_file in os.listdir(image_dir):
img_file_path = os.path.join(image_dir, img_file)
img = cv2.imread(img_file_path)
if img is None:
print("image read fail")
continue
height, weight, channel = img.shape
if height < 200 or weight <200 or channel != 3:
continue
#图片缩放成64*64
img = cv2.resize(img,(64,64))
new_file = os.path.join(new_girl_dir,img_file)
cv2.imwrite(new_file, img)
print(new_file)
remove_chongfu.py(cpu跑的话,非常慢,最好gpu)
# -*- coding: utf-8 -*-
import os
import cv2
import numpy as np
#判断两张图片是否一样
def is_same_image(img_file1, img_file2):
img1 = cv2.imread(img_file1)
img2 = cv2.imread(img_file2)
if img1 is None or img2 is None:
return False
if img1.shape == img2.shape and not (np.bitwise_xor(img1,img2).any()):
return True
else:
return False
#去重复照片
file_list = os.listdir('little_girls')
try:
for img1 in file_list:
print((len