培训代码框小黄人

最新推荐文章于 2022-05-15 23:51:16 发布

Alphapeople

最新推荐文章于 2022-05-15 23:51:16 发布

阅读量362

点赞数 1

分类专栏：计算机视觉深度学习人工智能文章标签：目标检测

本文链接：https://blog.csdn.net/weixin_38241876/article/details/90039962

版权

人工智能同时被 3 个专栏收录

130 篇文章 8 订阅

订阅专栏

深度学习

122 篇文章 4 订阅

订阅专栏

计算机视觉

107 篇文章 5 订阅

订阅专栏

生成背景图片（负样本）：

import os
import PIL.Image as image
x=1

bg_dir = r"datasets"
train_dir = r"train_img"
test_dir = r"test_img"

for filename in os.listdir(bg_dir):
    empty_image = image.open("{0}/{1}".format(bg_dir,filename))
    img = empty_image.convert("RGB")
    resize_image = img.resize((224, 224))
    # print(shape)
    resize_image.save("{0}\{1}.png".format(train_dir,str(x)+"."+"0"+"."+"0"+"."+"0"+"."+"0"+"."+"0"))
    # resize_image.save("{0}\{1}.png".format(test_dir,str(x)+"."+"0"+"."+"0"+"."+"0"+"."+"0"+"."+"0"))
    x=x+1
    if x == 500:
        break
    # resize_image.show()

生成训练样本（正样本）：

import os
import numpy as np
import matplotlib.image as image
import PIL.Image as pimage
import PIL.ImageDraw as imagedraw


bg_dir = r"datasets"
yellow_dir = r"yellow"
train_dir = r"train_img"
test_dir = r"test_img"
txt_dir = r"./x.txt"
#
# images = []
# coords = []
# confidences = []
x=1
with open(txt_dir,"w") as f:

    for filename in os.listdir(bg_dir):
        # 从系统直接读进来的filename包含了整体文件名（??????.jpg or ?????.png,....）
        background = pimage.open("{0}/{1}".format(bg_dir,filename))  # 批量读出要处理的图片
        img = background.convert("RGB")
        background_resize = img.resize((224, 224))
        # print(shape)
        name = np.random.randint(1, 21)
        # 直接打开的文件 文件名字和格式是分开的
        yellow_img = pimage.open("{0}/{1}.png".format(yellow_dir,name))  # 批量读出要处理的图片

        rot_img = yellow_img.rotate(np.random.randint(-45, 45))

        new_w = np.random.randint(50, 100)
        new_h = np.random.randint(50, 100)
        resize_img = rot_img.resize((new_w, new_h))  # 将要处理的图片按背景图比例缩放
        # print("ran_w and ran_h:",ran_w,ran_h)

        paste_x1 = np.random.randint(0, 224 - new_w)
        paste_y1 = np.random.randint(0, 224 - new_h)
        # print("ran_x and ran_y:",ran_x1,ran_y1,"\n")
        resize_img = resize_img.convert("RGBA")
        r, g, b, a = resize_img.split()
        background_resize.paste(resize_img, (paste_x1, paste_y1), mask=a)  # 将缩放后的图片按起始位置贴到背景图上

        paste_x2 = paste_x1 + new_w
        paste_y2 = paste_y1 + new_h

        f.write(str(x)+"."+str(paste_x1) + "." + str(paste_y1) + "." + str(paste_x2) + "." + str(paste_y2)+"."+"1"+"\n")

        background_resize.save("{0}\{1}.png".format(train_dir,str(x)+"."+str(paste_x1) + "." + str(paste_y1) + "." + str(paste_x2) + "." + str(paste_y2)+"."+"1"))  # 保持到目标位置

        # background_resize.save("{0}\{1}.png".format(test_dir,str(x)+"."+str(paste_x1) + "." + str(paste_y1) + "." + str(paste_x2) + "." + str(paste_y2)+"."+"1"))  # 保持到目标位置

        x=x+1
        if x ==500:
            break

采样：

import os
import numpy as np
import tensorflow as tf
import time
train_dir = r"train_img"
import tensorflow.contrib.eager as tfe
class Sample:
    def read_file(self):
        self.filenames = []
        self.labels = []
        self.confidences = []
        for filename in os.listdir(train_dir):
            name = os.path.splitext("{0}\{1}.png".format(train_dir, filename))#获取图片路径,分离文件名与扩展名
            data = filename.split(".")
            "a/img"
            a=["img/1.png","png"]
            coord_x1 = str(int(data[1])/224)
            coord_y1 = str(int(data[2])/224)
            coord_x2 = str(int(data[3])/224)
            coord_y2 = str(int(data[4])/224)
            confidence = data[5]
            labels = [coord_x1, coord_y1, coord_x2, coord_y2,confidence]

            # self.filenames.append(name[0])
            self.filenames.append("{0}\{1}".format(train_dir, filename))
            self.labels.append(labels)

    def _parse_function(self,filename, label):
        image_string = tf.read_file(filename)#将路径转成图片数据，读取图片
        image_decode = tf.image.decode_image(image_string)#将图片数据解码成图片
        return image_decode, label

    def get_batch(self,set):
        self.read_file()
        data_set1 = tf.data.Dataset.from_tensor_slices((self.filenames,self.labels))#切割数据第一维度，构建一个迭代器，每一个值就是第一维中的元素
        #例如[[1,2],[3,4]]就会成为[1,2]、[3,4]这样一个迭代器，将每一张图片和它对应的标签组成一组成为一次迭代的值
        data_set2 = data_set1.map(self._parse_function)#用map函数读取图片数据，构建一个每一个迭代成员都是图片及其对应标签的迭代器
        data_set3 = data_set2.repeat()#重复序列获取，将数据集重复多少次，不断地重复获取迭代器里的数据（重头到尾）
        data_set4 = data_set3.shuffle(set)#洗牌，打乱，打乱迭代器中数据的顺序
        batch_data_set = data_set4.batch(set)#得到批次，从迭代器中只取set个
        iterator = batch_data_set.make_one_shot_iterator()#创建迭代器
        batch = iterator.get_next()#运行迭代器
        with tf.Session() as sess:
            image,label = sess.run(batch)
            img = (image/255-0.5)*2
            return img,label

sample_train = Sample()
for i in range(100):
    a=time.time()
    print(np.shape(sample_train.get_batch(100)[0]))
    # print(sample_train.get_batch(100)[0])
    b=time.time()
    print(b-a)

搭建网络并进行训练：

import numpy as np
import tensorflow as tf
from random_sample_train import sample_train
from random_sample_test import sample_test
import PIL.Image as image
import PIL.ImageDraw as imagedraw
import matplotlib.pyplot as plt


train_batch_size = 10
test_batch_size = 2

class Net1:

    def __init__(self):

        self.x = tf.placeholder(dtype=tf.float32,shape=[None,224,224,3])
        self.y = tf.placeholder(dtype=tf.float32,shape=[None,5])

        self.conv_w1 = tf.Variable(tf.truncated_normal(shape=[3,3,3,64],stddev=tf.sqrt(1/64),dtype=tf.float32))
        self.conv_b1 = tf.Variable(tf.zeros(shape=[64],dtype=tf.float32))#224*224

        self.conv_w2 = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 128], stddev=tf.sqrt(1 / 128), dtype=tf.float32))
        self.conv_b2 = tf.Variable(tf.zeros(shape=[128], dtype=tf.float32))#112*112

        self.conv_w3 = tf.Variable(tf.truncated_normal(shape=[3, 3, 128, 256], stddev=tf.sqrt(1 / 256), dtype=tf.float32))
        self.conv_b3 = tf.Variable(tf.zeros(shape=[256], dtype=tf.float32))#56*56

        self.conv_w4 = tf.Variable(tf.truncated_normal(shape=[3, 3, 256, 512], stddev=tf.sqrt(1 / 512), dtype=tf.float32))
        self.conv_b4 = tf.Variable(tf.zeros(shape=[512], dtype=tf.float32))  # 28*28

        self.conv_w5 = tf.Variable(tf.truncated_normal(shape=[3, 3, 512, 512], stddev=tf.sqrt(1 / 512), dtype=tf.float32))
        self.conv_b5 = tf.Variable(tf.zeros(shape=[512], dtype=tf.float32))  # 14*14

        self.conv_w6 = tf.Variable(tf.truncated_normal(shape=[3, 3, 512, 512], stddev=tf.sqrt(1 / 512), dtype=tf.float32))
        self.conv_b6 = tf.Variable(tf.zeros(shape=[512], dtype=tf.float32))  # 7*7

        self.fcn_w1 = tf.Variable(tf.truncated_normal(shape=[7*7*512,512],stddev=tf.sqrt(1/512),dtype=tf.float32))
        self.fcn_b1 = tf.Variable(tf.zeros(shape=[512],dtype=tf.float32))#512

        self.fcn_w2 = tf.Variable(tf.truncated_normal(shape=[512,128],stddev=tf.sqrt(1/128),dtype=tf.float32))
        self.fcn_b2 = tf.Variable(tf.zeros(shape=[128],dtype=tf.float32))#128

        self.fcn_w3 = tf.Variable(tf.truncated_normal(shape=[128,64],stddev=tf.sqrt(1/64),dtype=tf.float32))
        self.fcn_b3 = tf.Variable(tf.zeros(shape=[64],dtype=tf.float32))#16

        self.fcn_w4 = tf.Variable(tf.truncated_normal(shape=[64,5],stddev=tf.sqrt(1/5),dtype=tf.float32))
        self.fcn_b4 = tf.Variable(tf.zeros(shape=[5],dtype=tf.float32))#4

    def forward(self):

        self.conv_y1 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(input=self.x,filter=self.conv_w1,strides=[1,1,1,1],padding="SAME")+self.conv_b1))#224
        self.mp_y1 = tf.nn.relu(tf.nn.max_pool(value=self.conv_y1,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME"))#112

        self.conv_y2 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(input=self.mp_y1, filter=self.conv_w2, strides=[1, 1, 1, 1], padding="SAME")+self.conv_b2))#112
        self.mp_y2 = tf.nn.relu(tf.nn.max_pool(value=self.conv_y2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME"))#56

        self.conv_y3 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(input=self.mp_y2, filter=self.conv_w3, strides=[1, 1, 1, 1], padding="SAME")+self.conv_b3))#56
        self.mp_y3 = tf.nn.relu(tf.nn.max_pool(value=self.conv_y3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME"))#28

        self.conv_y4 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(input=self.mp_y3, filter=self.conv_w4, strides=[1, 1, 1, 1],padding="SAME") + self.conv_b4))# 28
        self.mp_y4 = tf.nn.relu(tf.nn.max_pool(value=self.conv_y4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME"))  # 14

        self.conv_y5 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(input=self.mp_y4, filter=self.conv_w5, strides=[1, 1, 1, 1],padding="SAME") + self.conv_b5))# 14
        self.mp_y5 = tf.nn.relu(tf.nn.max_pool(value=self.conv_y5, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME"))  # 7

        self.fcn_y  = tf.reshape(self.mp_y5,[-1,7*7*512])

        self.fcn_y1 = tf.nn.relu(tf.layers.batch_normalization(tf.matmul(self.fcn_y,self.fcn_w1)+self.fcn_b1))#128

        self.fcn_y2 = tf.nn.relu(tf.layers.batch_normalization(tf.matmul(self.fcn_y1,self.fcn_w2)+self.fcn_b2))#64

        self.fcn_y3 = tf.nn.relu(tf.layers.batch_normalization(tf.matmul(self.fcn_y2,self.fcn_w3)+self.fcn_b3))#16

        self.fcn_y4 = tf.matmul(self.fcn_y3,self.fcn_w4)+self.fcn_b4 #[10,5]
        # print(self.fcn_y4.shape)

        self.out_y1 = self.fcn_y4[:,:4]
        self.out_y2l = self.fcn_y4[:,4:]
        self.out_y2 = tf.nn.sigmoid(self.out_y2l)

        self.label_y1 = self.y[:,:4]
        self.label_y2 = self.y[:,4:]


    def backward(self):
        # print(self.y1.shape,self.out_y1.shape)
        self.y1_loss = tf.reduce_mean((self.label_y1-self.out_y1)**2)
        self.y2_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.label_y2,logits=self.out_y2l))

        self.all_loss = self.y1_loss + self.y2_loss

        self.optimizer = tf.train.AdamOptimizer().minimize(loss=self.all_loss)


if __name__ == "__main__":
    net = Net1()
    net.forward()
    net.backward()

    net.init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    with tf.Session() as sess:
        # saver.restore(sess,"./params/ckpt")
        sess.run(net.init)

        img = []
        for i in range(5000):
            xs, ys = sample_train.get_batch(train_batch_size)
            train_coords_error,train_confidences_error,train_error,_ = sess.run([net.y1_loss, net.y2_loss, net.all_loss, net.optimizer],
                 feed_dict={net.x: xs, net.y: ys})

            if i %2 == 0:
                xss, yss = sample_test.get_batch(test_batch_size)
                test_coords, test_confidences, test_coords_error, test_confidences_error,test_error = sess.run(
                    [net.out_y1, net.out_y2, net.y1_loss, net.y2_loss,net.all_loss],
                    feed_dict={net.x: xss, net.y: yss})

                print("i:",i)
                # print("train_coords_error:", train_coords_error)
                # print("test_coords_error:",test_coords_error)
                # print("train_confidences_error:",train_confidences_error)
                # print("test_confidences_error:", test_confidences_error)
                print("train_error:",train_error)
                print("test_error:",test_error)

                x1 = test_coords[0][0]*224
                y1 = test_coords[0][1]*224
                x2 = test_coords[0][2]*224
                y2 = test_coords[0][3]*224
                test_confidence = test_confidences[0][0]


                imgs = xss[0]*255
                img = image.fromarray(np.uint8(imgs))

                x_1 = np.float32(yss[0][0])*224
                y_1 = np.float32(yss[0][1])*224
                x_2 = np.float32(yss[0][2])*224
                y_2 = np.float32(yss[0][3])*224

                print("label:", x_1, y_1, x_2, y_2)
                print("output:",x1,y1,x2,y2)
                print("test_confidences:", test_confidence)

                imgdraw = imagedraw.Draw(img)
                imgdraw.rectangle((x_1,y_1,x_2,y_2),outline="blue")
                imgdraw.rectangle((x1,y1,x2,y2),outline="red")
                # img.show()
                plt.imshow(img)
                plt.pause(0.1)
                saver.save(sess, "./params/ckpt")