创建数据集:
from PIL import Image
import random
import numpy as np
import os
def Paste(img1,img2):
# 加载底图
base_img = Image.open(os.path.join('/home/lhq/PycharmProjects/untitled/datasets',img1))
# 可以查看图片的size和mode,常见mode有RGB和RGBA,RGBA比RGB多了Alpha透明度
# print base_img.size, base_img.mode
# 加载需要P上去的图片
tmp_img = Image.open(os.path.join('/home/lhq/PycharmProjects/untitled/yellow',img2))
W,H,C = np.array(base_img).shape
try:
w = random.randint(1,W//2)
h = random.randint(1,H//2)
box = (w,h,w+100, h+100) # 底图上需要P掉的区域
# 这里可以选择一块区域或者整张图片
# region = tmp_img.crop((0,0,304,546)) #选择一块区域
# 或者使用整张图片
region = tmp_img
# 使用 paste(region, box) 方法将图片粘贴到另一种图片上去.
# 注意,region的大小必须和box的大小完全匹配。但是两张图片的mode可以不同,合并的时候回自动转化。如果需要保留透明度,则使用RGMA mode
# 提前将图片进行缩放,以适应box区域大小
region = region.resize((box[2] - box[0], box[3] - box[1]))
region = region.rotate(random.randint(-45, 45)) # 对图片进行旋转
base_img.save('dataset/{}.0.0.0.0.0.png'.format(img1.split('.')[0])) # 保存图片
region = region.convert('RGBA')
b,g,r,a = region.split()
base_img.paste(region, box,mask=a)
# base_img.show() # 查看合成的图片
base_img.save('dataset/{}.{}.{}.{}.{}.1.png'.format(img1.split('.')[0],w,h,w+100,h+100,1)) #保存图片
except:
pass
bj = os.listdir('/home/lhq/PycharmProjects/untitled/datasets')
bj.sort(key=lambda x:int(x.split('.')[0]))
for img1 in bj:
qj = os.listdir('/home/lhq/PycharmProjects/untitled/yellow')
for img2 in qj:
Paste(img1,img2)
from PIL import Image
import random
import numpy as np
import os
def Paste(img1,img2):
# 加载底图
base_img = Image.open(os.path.join('datasets',img1))
base_img = base_img.resize((224,224))
# 可以查看图片的size和mode,常见mode有RGB和RGBA,RGBA比RGB多了Alpha透明度
# print base_img.size, base_img.mode
# 加载需要P上去的图片
tmp_img = Image.open(os.path.join('yellow',img2))
try:
# 底图上需要P掉的区域
region = tmp_img
# 使用 paste(region, box) 方法将图片粘贴到另一种图片上去.
# 注意,region的大小必须和box的大小完全匹配。但是两张图片的mode可以不同,合并的时候回自动转化。如果需要保留透明度,则使用RGMA mode
# 提前将图片进行缩放,以适应box区域大小
new_w = random.randint(50, 100)
new_h = random.randint(50, 100)
region.resize((new_w, new_h))
w = random.randint(0, 224-new_w)
h = random.randint(0, 224-new_h)
box = (w, h, w + new_w, h + new_h) #
region = region.resize((box[2] - box[0], box[3] - box[1]))
region = region.rotate(random.randint(-45, 45)) # 对图片进行旋转
base_img.save('dataset/{}.0.0.0.0.0.png'.format(img1.split('.')[0])) # 保存图片
region = region.convert('RGBA')
b,g,r,a = region.split()
base_img.paste(region, box,mask=a)
# base_img.show() # 查看合成的图片
base_img.save('dataset/{}.{}.{}.{}.{}.1.png'.format(img1.split('.')[0],w,h,w+new_w,h+new_h,1)) #保存图片
except:
pass
bj = os.listdir('datasets')
bj.sort(key=lambda x:int(x.split('.')[0]))
for img1 in bj:
qj = os.listdir('yellow')
for img2 in qj:
Paste(img1,img2)
训练模型:
import cv2
import os
import random
import tensorflow as tf
import numpy as np
class Sample:
def __init__(self):
self.x = []
self.y = []
def get_batch(self,n):
imgs = os.listdir('dataset')
for i in range(n):
index = random.randint(0,len(imgs)-1)
img = imgs[index]
self.x.append(cv2.imread('dataset\{}'.format(img)))
position = img.split('.')
x1 = int(position[1])/224
y1 = int(position[2])/224
x2 = int(position[3])/224
y2 = int(position[4])/224
label = int(position[5])
self.y.append([x1,y1,x2,y2,label])
self.x1 = np.array(self.x)
self.x1 = (self.x1/255-0.5)*2
return self.x,self.x1,np.array(self.y)
class Net:
def __init__(self):
self.x = tf.placeholder(shape=[None,224,224,3],dtype=tf.float32)
self.y = tf.placeholder(shape=[None,5],dtype=tf.float32)
self.conv1_w = tf.Variable(tf.truncated_normal(shape=[3,3,3,64],dtype=tf.float32,stddev=tf.sqrt(2/(3*3*3))))
self.conv1_b = tf.Variable(tf.zeros([64]))
self.conv2_w = tf.Variable(tf.truncated_normal(shape=[3, 3,64,128], dtype=tf.float32, stddev=tf.sqrt(2 / (3*3*64))))
self.conv2_b = tf.Variable(tf.zeros([128]))
self.conv3_w = tf.Variable(tf.truncated_normal(shape=[3, 3,128,256], dtype=tf.float32, stddev=tf.sqrt(2 / (3*3*128))))
self.conv3_b = tf.Variable(tf.zeros([256]))
self.conv4_w = tf.Variable(tf.truncated_normal(shape=[3, 3, 256,256], dtype=tf.float32, stddev=tf.sqrt(2 / (3*3*256))))
self.conv4_b = tf.Variable(tf.zeros([256]))
self.conv5_w = tf.Variable(tf.truncated_normal(shape=[3, 3, 256,512], dtype=tf.float32, stddev=tf.sqrt(2 / (3*3*256))))
self.conv5_b = tf.Variable(tf.zeros([512]))
self.w1 = tf.Variable(tf.truncated_normal(shape=[7*7*512,512],dtype=tf.float32,stddev=tf.sqrt(1/(7*7*512))))
self.b1 = tf.Variable(tf.zeros([512]))
self.w2 = tf.Variable(tf.truncated_normal(shape=[512,256], dtype=tf.float32,stddev=tf.sqrt(1/512)))
self.b2 = tf.Variable(tf.zeros([256]))
self.w3_1 = tf.Variable(tf.truncated_normal(shape=[256,4], dtype=tf.float32,stddev=tf.sqrt(1/256)))
self.b3_1 = tf.Variable(tf.zeros([4]))
self.w3_2 = tf.Variable(tf.truncated_normal(shape=[256, 1], dtype=tf.float32, stddev=tf.sqrt(1 / 256)))
self.b3_2 = tf.Variable(tf.zeros([1]))
def forward(self):
self.conv1 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(self.x,self.conv1_w,strides=[1,1,1,1],padding='SAME')+self.conv1_b))
self.pool1 = tf.nn.relu(tf.nn.max_pool(self.conv1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID'))#112
self.conv2 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(self.pool1, self.conv2_w, strides=[1, 1, 1, 1], padding='SAME') + self.conv2_b))
self.pool2 = tf.nn.relu(tf.nn.max_pool(self.conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')) # 56
self.conv3 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(self.pool2, self.conv3_w, strides=[1, 1, 1, 1], padding='SAME') + self.conv3_b))
self.pool3 = tf.nn.relu(tf.nn.max_pool(self.conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID'))#28
self.conv4 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(self.pool3, self.conv4_w, strides=[1, 1, 1, 1], padding='SAME') + self.conv4_b))
self.pool4 = tf.nn.relu(tf.nn.max_pool(self.conv4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID'))#14
self.conv5 = tf.nn.relu(tf.layers.batch_normalization(tf.nn.conv2d(self.pool4, self.conv5_w, strides=[1, 1, 1, 1], padding='SAME') + self.conv5_b))
self.pool5 = tf.nn.relu(tf.nn.max_pool(self.conv5, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID'))#7
self.flat = tf.reshape(self.pool5,[-1,7*7*512])
self.f1 = tf.nn.relu(tf.layers.batch_normalization(tf.matmul(self.flat,self.w1)+self.b1))
self.f2 = tf.nn.relu(tf.layers.batch_normalization(tf.matmul(self.f1, self.w2) + self.b2))
self.out_1 = tf.matmul(self.f2, self.w3_1) + self.b3_1
self.out_2 = tf.matmul(self.f2, self.w3_2) + self.b3_2
def backward(self):
loss1 = tf.reduce_mean((self.out_1-self.y[:,:4])**2)#bbox损失
labels = tf.reshape(self.y[:, 4],[-1,1])
loss2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.nn.sigmoid(self.out_2), labels=labels))#置信度损失
self.loss = loss1+loss2
self.optimizer = tf.train.AdamOptimizer().minimize(self.loss)
if __name__ == '__main__':
net = Net()
net.forward()
net.backward()
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
for epoch in range(1000):
sample = Sample()
img,x, y = sample.get_batch(50)
loss,_,out,Confidence = sess.run([net.loss,net.optimizer,net.out_1,net.out_2],feed_dict={net.x:x,net.y:y})
position = [abs(int(n)) for n in list(out[0] * 224)]
print(loss)
if (epoch+1)%500 == 0:
saver.save(sess,save_path='params\chpk')
加载模型用于测试集:
if __name__ == '__main__':
net = Net()
net.forward()
saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess,'params\chpk')
imgs = os.listdir('dataset')
for img in imgs:
IMG = cv2.imread('dataset\{}'.format(img))
Img = np.reshape(IMG,[-1,224,224,3])
Img = (Img/255-0.5)*2
Out = sess.run([net.out_1], feed_dict={net.x: Img})
out = Out[0][0]
position = [abs(int(n)) for n in list(out*224)]
cv2.rectangle(IMG, (position[0], position[1]), (position[2], position[3]), (0, 0, 255), 2)
cv2.imshow('box',IMG)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
net = Net()
net.forward()
saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess,'params\chpk')
imgs = os.listdir('test_img')
for i in range(100):
index = random.randint(0,len(imgs))
img = imgs[index]
position = img.split('.')
x1 = int(position[1])
y1 = int(position[2])
x2 = int(position[3])
y2 = int(position[4])
IMG = cv2.imread(r'test_img\{}'.format(img))
Img = np.reshape(IMG, [-1, 224, 224, 3])
Img = (Img / 255 - 0.5) * 2
Out = sess.run([net.out_1], feed_dict={net.x: Img})
out = Out[0][0]
position = [abs(int(n)) for n in list(out * 224)]
cv2.rectangle(IMG, (position[0], position[1]), (position[2], position[3]), (0, 0, 255), 2)
cv2.rectangle(IMG, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.imshow(img, IMG)
cv2.waitKey(0)
cv2.destroyAllWindows()
pytorch:
import os
import cv2
import numpy as np
import random
import torch
from torch import nn
from _02 import Sample
from torch.autograd import Variable
from torch import optim
import matplotlib.pyplot as plt
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3,32,3,padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2,2,padding=0),
nn.ReLU()
)#112
self.conv2 = nn.Sequential(
nn.Conv2d(32, 64, 3,padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2,padding=0),
nn.ReLU()
)#56
self.conv3 = nn.Sequential(
nn.Conv2d(64,128, 3,padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2,padding=0),
nn.ReLU()
)#28
self.conv4 = nn.Sequential(
nn.Conv2d(128,256, 3,padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2,padding=0),
nn.ReLU()
)#14
self.conv5 = nn.Sequential(
nn.Conv2d(256,512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, padding=0),
nn.ReLU()
) # 7
self.conv6 = nn.Conv2d(512,5,7,stride=7,padding=0)
self.confidence = nn.Sigmoid()
# self.fc1 = nn.Sequential(
# nn.Linear(7*7*512,512),
# nn.BatchNorm1d(512),
# nn.ReLU()
# )
# self.fc2 = nn.Sequential(
# nn.Linear(512,256),
# nn.BatchNorm1d(256),
# nn.ReLU()
# )
# self.fc3 = nn.Linear(256,5)
# self.confidence = nn.Sigmoid()
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
# x = x.view(-1,7*7*512)
x = self.conv6(x)
x = x.view(-1,5)
# x = self.fc1(x)
# x = self.fc2(x)
# x = self.fc3(x)
position = x[:,:4]
confidence = self.confidence(x[:,-1])
return position,confidence
if __name__ == '__main__':
net = Net()
sample = Sample()
if torch.cuda.is_available():
net = net.cuda()
optimizer = optim.Adam(net.parameters(),lr=0.0001)
error1 = nn.MSELoss()
error2 = nn.BCELoss()
for epoch in range(100):
datas,lables = sample.get_batch(3)
datas = np.transpose(datas,[0,3,1,2])
datas = torch.Tensor(datas)
lables = torch.Tensor(lables)
if torch.cuda.is_available():
datas = datas.cuda()
lables = lables.cuda()
else:
datas = Variable(datas,requires_grad=True)
lables = Variable(lables,requires_grad=True)
torch.cuda.empty_cache()
position,confidence = net(datas)
loss1 = error1(position,lables[:,:4])
loss2 = error2(confidence,lables[:,-1])
loss = loss1 + loss2
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(loss1, loss2)
if epoch > 5:
x1 = int(abs(position[0, 0].item()) * 224)
y1 = int(abs(position[0, 1].item()) * 224)
x2 = int(abs(position[0, 2].item()) * 224)
y2 = int(abs(position[0, 3].item()) * 224)
x1_ = int(abs(lables[:, :4][0, 0].item()) * 224)
y1_ = int(abs(lables[:, :4][0, 1].item()) * 224)
x2_ = int(abs(lables[:, :4][0, 2].item()) * 224)
y2_ = int(abs(lables[:, :4][0, 3].item()) * 224)
IMG = cv2.imread(r'0.25.132.113.215.1.png')
cv2.rectangle(IMG, (x1, y1), (x2, y2), (255, 0, 0), 2) # 蓝
cv2.rectangle(IMG, (x1_, y1_), (x2_, y2_), (0, 0, 255), 2) # 红
cv2.imshow('img', IMG)
cv2.waitKey(0)
# if loss1 < 0.2:
# torch.save(net.state_dict(),'net_params.pkl')
# print(epoch)
# exit(0)
# elif epoch >= 5000 and loss1 >= 0.2:
# exit(0)
# if epoch % 20 == 0:
# print(loss1,loss2)