生成背景图片(负样本):
import os
import PIL.Image as image
x=1
bg_dir = r"datasets"
train_dir = r"train_img"
test_dir = r"test_img"
for filename in os.listdir(bg_dir):
empty_image = image.open("{0}/{1}".format(bg_dir,filename))
img = empty_image.convert("RGB")
resize_image = img.resize((224, 224))
# print(shape)
resize_image.save("{0}\{1}.png".format(train_dir,str(x)+"."+"0"+"."+"0"+"."+"0"+"."+"0"+"."+"0"))
# resize_image.save("{0}\{1}.png".format(test_dir,str(x)+"."+"0"+"."+"0"+"."+"0"+"."+"0"+"."+"0"))
x=x+1
if x == 500:
break
# resize_image.show()
生成训练样本(正样本):
import os
import numpy as np
import matplotlib.image as image
import PIL.Image as pimage
import PIL.ImageDraw as imagedraw
bg_dir = r"datasets"
yellow_dir = r"yellow"
train_dir = r"train_img"
test_dir = r"test_img"
txt_dir = r"./x.txt"
#
# images = []
# coords = []
# confidences = []
x=1
with open(txt_dir,"w") as f:
for filename in os.listdir(bg_dir):
# 从系统直接读进来的filename包含了整体文件名(??????.jpg or ?????.png,....)
background = pimage.open("{0}/{1}".format(bg_dir,filename)) # 批量读出要处理的图片
img = background.convert("RGB")
background_resize = img.resize((224, 224))
# print(shape)
name = np.random.randint(1, 21)
# 直接打开的文件 文件名字和格式是分开的
yellow_img = pimage.open("{0}/{1}.png".format(yellow_dir,name)) # 批量读出要处理的图片
rot_img = yellow_img.rotate(np.random.randint(-45, 45))
new_w = np.random.randint(50, 100)
new_h = np.random.randint(50, 100)
resize_img = rot_img.resize((new_w, new_h)) # 将要处理的图片按背景图比例缩放
# print("ran_w and ran_h:",ran_w,ran_h)
paste_x1 = np.random.randint(0, 224 - new_w)
paste_y1 = np.random.randint(0, 224 - new_h)
# print("ran_x and ran_y:",ran_x1,ran_y1,"\n")
resize_img = resize_img.convert("RGBA")
r, g, b, a = resize_img.split()
background_resize.paste(resize_img, (paste_x1, paste_y1), mask=a) # 将缩放后的图片按起始位置贴到背景图上
paste_x2 = paste_x1 + new_w
paste_y2 = paste_y1 + new_h
f.write(str(x)+"."+str(paste_x1) + "." + str(paste_y1) + "." + str(paste_x2) + "." + str(paste_y2)+"."+"1"+"\n")
background_resize.save("{0}\{1}.png".format(train_dir,str(x)+"."+str(paste_x1) + "." + str(paste_y1) + "." + str(paste_x2) + "." + str(paste_y2)+"."+"1")) # 保持到目标位置
# background_resize.save("{0}\{1}.png".format(test_dir,str(x)+"."+str(paste_x1) + "." + str(paste_y1) + "." + str(paste_x2) + "." + str(paste_y2)+"."+"1")) # 保持到目标位置
x=x+1
if x ==500:
break
采样:
import os
import numpy as np
import tensorflow as tf
import time
train_dir = r"train_img"
import tensorflow.contrib.eager as tfe
class Sample:
def read_file(self):
self.filenames = []
self.labels = []
self.confidences = []
for filename in os.listdir(train_dir):
name = os.path.splitext("{0}\{1}.png".format(train_dir, filename))#获取图片路径,分离文件名与扩展名
data = filename.split(".")
"a/img"
a=["img/1.png","png"]
coord_x1 = str(int(data[1])/224)
coord_y1 = str(int(data[2])/224)
coord_x2 = str(int(data[3])/224)
coord_y2 = str(int(data[4])/224)
confidence = data[5]
labels = [coord_x1, coord_y1, coord_x2, coord_y2,confidence]
# self.filenames.append(name[0])
self.filenames.append("{0}\{1}".format(train_dir, filename))
self.labels.append(labels)
def _parse_function(self,filename, label):
image_string = tf.read_file(filename)#将路径转成图片数据,读取图片
image_decode = tf.image.decode_image(image_string)#将图片数据解码成图片
return image_decode, label
def get_batch(self,set):
self.read_file()
data_set1 = tf.data.Dataset.from_tensor_slices((self.filenames,self.labels))#切割数据第一维度,构建一个迭代器,每一个值就是第一维中的元素
#例如[[1,2],[3,4]]就会成为[1,2]、[3,4]这样一个迭代器,将每一张图片和它对应的标签组成一组成为一次迭代的值
data_set2 = data_set1.map(self._parse_function)#用map函数读取图片数据,构建一个每一个迭代成员都是图片及其对应标签的迭代器
data_set3 = data_set2.repeat()#重复序列获取,将数据集重复多少次,不断地重复获取迭代器里的数据(重头到尾)
data_set4 = data_set3.shuffle(set)#洗牌,打乱,打乱迭代器中数据的顺序
batch_data_set = data_set4.batch(set)#得到批次,从迭代器中只取set个
iterator = batch_data_set.make_one_shot_iterator()#创建迭代器
batch = iterator.get_next()#运行迭代器
with tf.Session() as sess:
image,label = sess.run(batch)
img = (image/255-0.5)*2
return img,label
sample_train = Sample()
for i in range(100):
a=time.time()
print(np.shape(sample_train.get_batch(100)[0]))
# print(sample_train.get_batch(100)[0])
b=time.time()
print(b-a)
搭建网络并进行训练:
import numpy as np
import tensorflow as tf
from random_sample_train import sample_train
from random_sample_test import sample_test
import PIL.Image as image
import PIL.ImageDraw as imagedraw
import matplotlib.pyplot as plt
train_batch_size = 10
test_batch_size = 2
class Net1:
def __init__(self):
self.x = tf.placeholder(dtype=tf.float32,shape=[None,224,224,3])
self.y = tf.placeholder(dtype=tf.float32,shape=[None,5])
self.conv_w1 = tf.Variable(tf.truncated_normal(shape=[3,3,3,64],stddev=tf.sqrt(1/64),dtype=tf.float32))
self.conv_b1 = tf.Variable(tf.zeros(shape=[64],dtype=tf.float32))#224*224
self.conv_w2 = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 128], stddev=tf.sqrt(1 / 128), dtype=tf.float32))
self.conv_b2 = tf.Variable(tf.zeros(shape=[128], dtype=tf.float32))#112*112
self.conv_w3 = tf.Variable(tf.truncated_normal(shape=[3, 3, 128, 256], stddev=tf.sqrt(1 / 256), dtype=tf.float32))
self.conv_b3 = tf.Variable(tf.zeros(shape=[256], dtype=tf.float32))#56*56
self.conv_w4 = tf.Variable(tf.truncated_normal(shape=[3, 3, 256, 512], stddev=tf.sqrt(1 / 512), dtype=tf.float32))
self.conv_b4 = tf.Variable(tf.zeros(shape=[512], dtype=tf.float32)) # 28*28
self.conv_w5 = tf.Variable(tf.truncated_normal(shape=[3, 3, 512, 512], stddev=tf.sqrt(1 / 512), dtype=tf.float32))
self.conv_b5 = tf.Variable(tf.zeros(shape=[512], dtype=tf.float32)) # 14*14
self.conv_w6 = tf.Variable(tf.truncated_normal(shape=[3, 3, 512, 512], stddev=tf.sqrt(1 / 512), dtype=tf.float32))
self.conv_b6 = tf.Variable(tf.zeros(shape=[512], dtype=tf.float32)) # 7*7
self.fcn_w1 = tf.Variable(tf.truncated_normal(shape=[7*7*512,512],stddev=tf.sqrt(1/512),dtype=tf.float32))
self.fcn_b1 = tf.Variable(tf.zeros(shape=[512],dtype=tf.float32))#512
self.fcn_w2 = tf.Variable(tf.truncated_normal(shape=[512,128],stddev=tf.sqrt(1/128),dtype=tf.float32))
self.fcn_b2 = tf.Variable(tf.zeros(shape=[128],dtype=tf.float32))#128
self.fcn_w3 = tf.Variable(tf.truncated_normal(shape=[128,64],stddev=tf.sqrt(1/64),dtype=tf.float32))
self.fcn_b3 = tf.Variable(tf.zeros(shape=[64],dtype=tf.float32))#16
self.fcn_w4 = tf.Variable(tf.truncated_normal(shape=[64,5],stddev=tf.sqrt(1/5),dtype=tf.float32))
self.fcn_b4 = tf.Variable(tf.zeros(shape=[5],dtype=tf.float32))#4
def forward(self):
self.conv_y1 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(input=self.x,filter=self.conv_w1,strides=[1,1,1,1],padding="SAME")+self.conv_b1))#224
self.mp_y1 = tf.nn.relu(tf.nn.max_pool(value=self.conv_y1,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME"))#112
self.conv_y2 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(input=self.mp_y1, filter=self.conv_w2, strides=[1, 1, 1, 1], padding="SAME")+self.conv_b2))#112
self.mp_y2 = tf.nn.relu(tf.nn.max_pool(value=self.conv_y2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME"))#56
self.conv_y3 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(input=self.mp_y2, filter=self.conv_w3, strides=[1, 1, 1, 1], padding="SAME")+self.conv_b3))#56
self.mp_y3 = tf.nn.relu(tf.nn.max_pool(value=self.conv_y3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME"))#28
self.conv_y4 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(input=self.mp_y3, filter=self.conv_w4, strides=[1, 1, 1, 1],padding="SAME") + self.conv_b4))# 28
self.mp_y4 = tf.nn.relu(tf.nn.max_pool(value=self.conv_y4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")) # 14
self.conv_y5 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(input=self.mp_y4, filter=self.conv_w5, strides=[1, 1, 1, 1],padding="SAME") + self.conv_b5))# 14
self.mp_y5 = tf.nn.relu(tf.nn.max_pool(value=self.conv_y5, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")) # 7
self.fcn_y = tf.reshape(self.mp_y5,[-1,7*7*512])
self.fcn_y1 = tf.nn.relu(tf.layers.batch_normalization(tf.matmul(self.fcn_y,self.fcn_w1)+self.fcn_b1))#128
self.fcn_y2 = tf.nn.relu(tf.layers.batch_normalization(tf.matmul(self.fcn_y1,self.fcn_w2)+self.fcn_b2))#64
self.fcn_y3 = tf.nn.relu(tf.layers.batch_normalization(tf.matmul(self.fcn_y2,self.fcn_w3)+self.fcn_b3))#16
self.fcn_y4 = tf.matmul(self.fcn_y3,self.fcn_w4)+self.fcn_b4 #[10,5]
# print(self.fcn_y4.shape)
self.out_y1 = self.fcn_y4[:,:4]
self.out_y2l = self.fcn_y4[:,4:]
self.out_y2 = tf.nn.sigmoid(self.out_y2l)
self.label_y1 = self.y[:,:4]
self.label_y2 = self.y[:,4:]
def backward(self):
# print(self.y1.shape,self.out_y1.shape)
self.y1_loss = tf.reduce_mean((self.label_y1-self.out_y1)**2)
self.y2_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.label_y2,logits=self.out_y2l))
self.all_loss = self.y1_loss + self.y2_loss
self.optimizer = tf.train.AdamOptimizer().minimize(loss=self.all_loss)
if __name__ == "__main__":
net = Net1()
net.forward()
net.backward()
net.init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
# saver.restore(sess,"./params/ckpt")
sess.run(net.init)
img = []
for i in range(5000):
xs, ys = sample_train.get_batch(train_batch_size)
train_coords_error,train_confidences_error,train_error,_ = sess.run([net.y1_loss, net.y2_loss, net.all_loss, net.optimizer],
feed_dict={net.x: xs, net.y: ys})
if i %2 == 0:
xss, yss = sample_test.get_batch(test_batch_size)
test_coords, test_confidences, test_coords_error, test_confidences_error,test_error = sess.run(
[net.out_y1, net.out_y2, net.y1_loss, net.y2_loss,net.all_loss],
feed_dict={net.x: xss, net.y: yss})
print("i:",i)
# print("train_coords_error:", train_coords_error)
# print("test_coords_error:",test_coords_error)
# print("train_confidences_error:",train_confidences_error)
# print("test_confidences_error:", test_confidences_error)
print("train_error:",train_error)
print("test_error:",test_error)
x1 = test_coords[0][0]*224
y1 = test_coords[0][1]*224
x2 = test_coords[0][2]*224
y2 = test_coords[0][3]*224
test_confidence = test_confidences[0][0]
imgs = xss[0]*255
img = image.fromarray(np.uint8(imgs))
x_1 = np.float32(yss[0][0])*224
y_1 = np.float32(yss[0][1])*224
x_2 = np.float32(yss[0][2])*224
y_2 = np.float32(yss[0][3])*224
print("label:", x_1, y_1, x_2, y_2)
print("output:",x1,y1,x2,y2)
print("test_confidences:", test_confidence)
imgdraw = imagedraw.Draw(img)
imgdraw.rectangle((x_1,y_1,x_2,y_2),outline="blue")
imgdraw.rectangle((x1,y1,x2,y2),outline="red")
# img.show()
plt.imshow(img)
plt.pause(0.1)
saver.save(sess, "./params/ckpt")