目标检测小实验框小黄人

最新推荐文章于 2021-03-28 21:23:56 发布

Alphapeople

最新推荐文章于 2021-03-28 21:23:56 发布

阅读量660

点赞数

分类专栏：计算机视觉深度学习人工智能文章标签：目标检测

本文链接：https://blog.csdn.net/weixin_38241876/article/details/90038484

版权

人工智能同时被 3 个专栏收录

130 篇文章 8 订阅

订阅专栏

深度学习

122 篇文章 4 订阅

订阅专栏

计算机视觉

107 篇文章 5 订阅

订阅专栏

创建数据集：

from PIL import Image
import random
import numpy as np
import os
def Paste(img1,img2):
    # 加载底图
    base_img = Image.open(os.path.join('/home/lhq/PycharmProjects/untitled/datasets',img1))
    # 可以查看图片的size和mode，常见mode有RGB和RGBA，RGBA比RGB多了Alpha透明度
    # print base_img.size, base_img.mode
 
    # 加载需要P上去的图片
    tmp_img = Image.open(os.path.join('/home/lhq/PycharmProjects/untitled/yellow',img2))
    W,H,C = np.array(base_img).shape
    try:
        w = random.randint(1,W//2)
        h = random.randint(1,H//2)
        box = (w,h,w+100, h+100)  # 底图上需要P掉的区域
        # 这里可以选择一块区域或者整张图片
        # region = tmp_img.crop((0,0,304,546)) #选择一块区域
        # 或者使用整张图片
        region = tmp_img
 
        # 使用 paste(region, box) 方法将图片粘贴到另一种图片上去.
        # 注意，region的大小必须和box的大小完全匹配。但是两张图片的mode可以不同，合并的时候回自动转化。如果需要保留透明度，则使用RGMA mode
        # 提前将图片进行缩放，以适应box区域大小
        region = region.resize((box[2] - box[0], box[3] - box[1]))
        region = region.rotate(random.randint(-45, 45))  # 对图片进行旋转
        base_img.save('dataset/{}.0.0.0.0.0.png'.format(img1.split('.')[0]))  # 保存图片
        region = region.convert('RGBA')
        b,g,r,a = region.split()
        base_img.paste(region, box,mask=a)
        # base_img.show() # 查看合成的图片
        base_img.save('dataset/{}.{}.{}.{}.{}.1.png'.format(img1.split('.')[0],w,h,w+100,h+100,1)) #保存图片
    except:
        pass
 
bj = os.listdir('/home/lhq/PycharmProjects/untitled/datasets')
bj.sort(key=lambda x:int(x.split('.')[0]))
for img1 in bj:
    qj = os.listdir('/home/lhq/PycharmProjects/untitled/yellow')
    for img2 in qj:
        Paste(img1,img2)

from PIL import Image
import random
import numpy as np
import os
def Paste(img1,img2):
    # 加载底图
    base_img = Image.open(os.path.join('datasets',img1))
    base_img = base_img.resize((224,224))
    # 可以查看图片的size和mode，常见mode有RGB和RGBA，RGBA比RGB多了Alpha透明度
    # print base_img.size, base_img.mode

    # 加载需要P上去的图片
    tmp_img = Image.open(os.path.join('yellow',img2))
    try:
         # 底图上需要P掉的区域
        region = tmp_img
        # 使用 paste(region, box) 方法将图片粘贴到另一种图片上去.
        # 注意，region的大小必须和box的大小完全匹配。但是两张图片的mode可以不同，合并的时候回自动转化。如果需要保留透明度，则使用RGMA mode
        # 提前将图片进行缩放，以适应box区域大小
        new_w = random.randint(50, 100)
        new_h = random.randint(50, 100)
        region.resize((new_w, new_h))
        w = random.randint(0, 224-new_w)
        h = random.randint(0, 224-new_h)
        box = (w, h, w + new_w, h + new_h)  #
        region = region.resize((box[2] - box[0], box[3] - box[1]))
        region = region.rotate(random.randint(-45, 45))  # 对图片进行旋转
        base_img.save('dataset/{}.0.0.0.0.0.png'.format(img1.split('.')[0]))  # 保存图片
        region = region.convert('RGBA')
        b,g,r,a = region.split()
        base_img.paste(region, box,mask=a)
        # base_img.show() # 查看合成的图片
       base_img.save('dataset/{}.{}.{}.{}.{}.1.png'.format(img1.split('.')[0],w,h,w+new_w,h+new_h,1)) #保存图片
    except:
        pass

bj = os.listdir('datasets')
bj.sort(key=lambda x:int(x.split('.')[0]))
for img1 in bj:
    qj = os.listdir('yellow')
    for img2 in qj:
        Paste(img1,img2)

训练模型：

import cv2
import os
import random
import tensorflow as tf
import numpy as np

class Sample:
    def __init__(self):
        self.x = []
        self.y = []
    def get_batch(self,n):
        imgs = os.listdir('dataset')
        for i in range(n):
            index = random.randint(0,len(imgs)-1)
            img = imgs[index]
            self.x.append(cv2.imread('dataset\{}'.format(img)))
            position = img.split('.')
            x1 = int(position[1])/224
            y1 = int(position[2])/224
            x2 = int(position[3])/224
            y2 = int(position[4])/224
            label = int(position[5])
            self.y.append([x1,y1,x2,y2,label])
        self.x1 = np.array(self.x)
        self.x1 = (self.x1/255-0.5)*2
        return self.x,self.x1,np.array(self.y)

class Net:
    def __init__(self):
        self.x = tf.placeholder(shape=[None,224,224,3],dtype=tf.float32)
        self.y = tf.placeholder(shape=[None,5],dtype=tf.float32)
        self.conv1_w = tf.Variable(tf.truncated_normal(shape=[3,3,3,64],dtype=tf.float32,stddev=tf.sqrt(2/(3*3*3))))
        self.conv1_b = tf.Variable(tf.zeros([64]))
        self.conv2_w = tf.Variable(tf.truncated_normal(shape=[3, 3,64,128], dtype=tf.float32, stddev=tf.sqrt(2 / (3*3*64))))
        self.conv2_b = tf.Variable(tf.zeros([128]))
        self.conv3_w = tf.Variable(tf.truncated_normal(shape=[3, 3,128,256], dtype=tf.float32, stddev=tf.sqrt(2 / (3*3*128))))
        self.conv3_b = tf.Variable(tf.zeros([256]))
        self.conv4_w = tf.Variable(tf.truncated_normal(shape=[3, 3, 256,256], dtype=tf.float32, stddev=tf.sqrt(2 / (3*3*256))))
        self.conv4_b = tf.Variable(tf.zeros([256]))
        self.conv5_w = tf.Variable(tf.truncated_normal(shape=[3, 3, 256,512], dtype=tf.float32, stddev=tf.sqrt(2 / (3*3*256))))
        self.conv5_b = tf.Variable(tf.zeros([512]))
        self.w1 = tf.Variable(tf.truncated_normal(shape=[7*7*512,512],dtype=tf.float32,stddev=tf.sqrt(1/(7*7*512))))
        self.b1 = tf.Variable(tf.zeros([512]))
        self.w2 = tf.Variable(tf.truncated_normal(shape=[512,256], dtype=tf.float32,stddev=tf.sqrt(1/512)))
        self.b2 = tf.Variable(tf.zeros([256]))
        self.w3_1 = tf.Variable(tf.truncated_normal(shape=[256,4], dtype=tf.float32,stddev=tf.sqrt(1/256)))
        self.b3_1 = tf.Variable(tf.zeros([4]))
        self.w3_2 = tf.Variable(tf.truncated_normal(shape=[256, 1], dtype=tf.float32, stddev=tf.sqrt(1 / 256)))
        self.b3_2 = tf.Variable(tf.zeros([1]))
    def forward(self):
        self.conv1 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(self.x,self.conv1_w,strides=[1,1,1,1],padding='SAME')+self.conv1_b))
        self.pool1 = tf.nn.relu(tf.nn.max_pool(self.conv1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID'))#112
        self.conv2 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(self.pool1, self.conv2_w, strides=[1, 1, 1, 1], padding='SAME') + self.conv2_b))
        self.pool2 = tf.nn.relu(tf.nn.max_pool(self.conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID'))  # 56
        self.conv3 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(self.pool2, self.conv3_w, strides=[1, 1, 1, 1], padding='SAME') + self.conv3_b))
        self.pool3 = tf.nn.relu(tf.nn.max_pool(self.conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID'))#28
        self.conv4 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(self.pool3, self.conv4_w, strides=[1, 1, 1, 1], padding='SAME') + self.conv4_b))
        self.pool4 = tf.nn.relu(tf.nn.max_pool(self.conv4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID'))#14
        self.conv5 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(self.pool4, self.conv5_w, strides=[1, 1, 1, 1], padding='SAME') + self.conv5_b))
        self.pool5 = tf.nn.relu(tf.nn.max_pool(self.conv5, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID'))#7
        self.flat = tf.reshape(self.pool5,[-1,7*7*512])
        self.f1 = tf.nn.relu(tf.layers.batch_normalization(tf.matmul(self.flat,self.w1)+self.b1))
        self.f2 = tf.nn.relu(tf.layers.batch_normalization(tf.matmul(self.f1, self.w2) + self.b2))
        self.out_1 = tf.matmul(self.f2, self.w3_1) + self.b3_1
        self.out_2 = tf.matmul(self.f2, self.w3_2) + self.b3_2
    def backward(self):
        loss1 = tf.reduce_mean((self.out_1-self.y[:,:4])**2)#bbox损失
        labels = tf.reshape(self.y[:, 4],[-1,1])
        loss2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.nn.sigmoid(self.out_2), labels=labels))#置信度损失
        self.loss = loss1+loss2
        self.optimizer = tf.train.AdamOptimizer().minimize(self.loss)

if __name__ == '__main__':
    net = Net()
    net.forward()
    net.backward()
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(1000):
            sample = Sample()
            img,x, y = sample.get_batch(50)
            loss,_,out,Confidence = sess.run([net.loss,net.optimizer,net.out_1,net.out_2],feed_dict={net.x:x,net.y:y})
            position = [abs(int(n)) for n in list(out[0] * 224)]
            print(loss)
            if (epoch+1)%500 == 0:
                saver.save(sess,save_path='params\chpk')

加载模型用于测试集：

if __name__ == '__main__':
    net = Net()
    net.forward()
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess,'params\chpk')
        imgs = os.listdir('dataset')
        for img in imgs:
            IMG = cv2.imread('dataset\{}'.format(img))
            Img = np.reshape(IMG,[-1,224,224,3])
            Img = (Img/255-0.5)*2
            Out = sess.run([net.out_1], feed_dict={net.x: Img})
            out = Out[0][0]
            position = [abs(int(n)) for n in list(out*224)]
            cv2.rectangle(IMG, (position[0], position[1]), (position[2], position[3]), (0, 0, 255), 2)
            cv2.imshow('box',IMG)
            cv2.waitKey(0)
            cv2.destroyAllWindows()

if __name__ == '__main__':
    net = Net()
    net.forward()
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess,'params\chpk')
        imgs = os.listdir('test_img')
        for i in range(100):
            index = random.randint(0,len(imgs))
            img = imgs[index]
            position = img.split('.')
            x1 = int(position[1])
            y1 = int(position[2])
            x2 = int(position[3])
            y2 = int(position[4])
            IMG = cv2.imread(r'test_img\{}'.format(img))
            Img = np.reshape(IMG, [-1, 224, 224, 3])
            Img = (Img / 255 - 0.5) * 2
            Out = sess.run([net.out_1], feed_dict={net.x: Img})
            out = Out[0][0]
            position = [abs(int(n)) for n in list(out * 224)]
            cv2.rectangle(IMG, (position[0], position[1]), (position[2], position[3]), (0, 0, 255), 2)
            cv2.rectangle(IMG, (x1, y1), (x2, y2), (255, 0, 0), 2)
            cv2.imshow(img, IMG)
            cv2.waitKey(0)
            cv2.destroyAllWindows()

pytorch:

import os
import cv2
import numpy as np
import random
import torch
from torch import nn
from _02 import Sample
from torch.autograd import Variable
from torch import optim
import matplotlib.pyplot as plt

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3,32,3,padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2,2,padding=0),
            nn.ReLU()
        )#112
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, 3,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2,padding=0),
            nn.ReLU()
        )#56
        self.conv3 = nn.Sequential(
            nn.Conv2d(64,128, 3,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2,padding=0),
            nn.ReLU()
        )#28
        self.conv4 = nn.Sequential(
            nn.Conv2d(128,256, 3,padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2,padding=0),
            nn.ReLU()
        )#14
        self.conv5 = nn.Sequential(
            nn.Conv2d(256,512, 3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, padding=0),
            nn.ReLU()
        )  # 7
        self.conv6 = nn.Conv2d(512,5,7,stride=7,padding=0)
        self.confidence = nn.Sigmoid()
        # self.fc1 = nn.Sequential(
        #     nn.Linear(7*7*512,512),
        #     nn.BatchNorm1d(512),
        #     nn.ReLU()
        # )
        # self.fc2 = nn.Sequential(
        #     nn.Linear(512,256),
        #     nn.BatchNorm1d(256),
        #     nn.ReLU()
        # )
        # self.fc3 = nn.Linear(256,5)
        # self.confidence = nn.Sigmoid()

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        # x = x.view(-1,7*7*512)
        x = self.conv6(x)
        x = x.view(-1,5)
        # x = self.fc1(x)
        # x = self.fc2(x)
        # x = self.fc3(x)
        position = x[:,:4]
        confidence = self.confidence(x[:,-1])
        return position,confidence

if __name__ == '__main__':
    net = Net()
    sample = Sample()
    if torch.cuda.is_available():
        net = net.cuda()
    optimizer = optim.Adam(net.parameters(),lr=0.0001)
    error1 = nn.MSELoss()
    error2 = nn.BCELoss()
    for epoch in range(100):
        datas,lables = sample.get_batch(3)
        datas = np.transpose(datas,[0,3,1,2])
        datas = torch.Tensor(datas)
        lables = torch.Tensor(lables)
        if torch.cuda.is_available():
            datas = datas.cuda()
            lables = lables.cuda()
        else:
            datas = Variable(datas,requires_grad=True)
            lables = Variable(lables,requires_grad=True)
        torch.cuda.empty_cache()
        position,confidence = net(datas)
        loss1 = error1(position,lables[:,:4])
        loss2 = error2(confidence,lables[:,-1])
        loss = loss1 + loss2
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(loss1, loss2)
        if epoch > 5:
                x1 = int(abs(position[0, 0].item()) * 224)
                y1 = int(abs(position[0, 1].item()) * 224)
                x2 = int(abs(position[0, 2].item()) * 224)
                y2 = int(abs(position[0, 3].item()) * 224)
                x1_ = int(abs(lables[:, :4][0, 0].item()) * 224)
                y1_ = int(abs(lables[:, :4][0, 1].item()) * 224)
                x2_ = int(abs(lables[:, :4][0, 2].item()) * 224)
                y2_ = int(abs(lables[:, :4][0, 3].item()) * 224)
                IMG = cv2.imread(r'0.25.132.113.215.1.png')
                cv2.rectangle(IMG, (x1, y1), (x2, y2), (255, 0, 0), 2)  # 蓝
                cv2.rectangle(IMG, (x1_, y1_), (x2_, y2_), (0, 0, 255), 2)  # 红
                cv2.imshow('img', IMG)
                cv2.waitKey(0)
            # if loss1 < 0.2:
            #     torch.save(net.state_dict(),'net_params.pkl')
            #     print(epoch)
            #     exit(0)
            # elif epoch >= 5000 and loss1 >= 0.2:
            #     exit(0)
            # if epoch % 20 == 0:
            #     print(loss1,loss2)

Alphapeople

关注

0
点赞
踩
6

收藏

觉得还不错? 一键收藏
0
评论
目标检测小实验框小黄人

创建数据集：from PIL import Imageimport randomimport numpy as npimport osdef Paste(img1,img2): # 加载底图 base_img = Image.open(os.path.join('/home/lhq/PycharmProjects/untitled/datasets',img1)) ...
复制链接

扫一扫

专栏目录