FCN
import torch.nn as nn
from model.Vgg16 import VGG
import torch
class FCNS(nn.Module):
def __init__(self,num_class,backbone='vgg'):
super().__init__()
if backbone == 'vgg':
self.features = VGG()
self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1,output_padding=1) # (i-1)*s-2*p+d(k-1)+o_p+1
self.bn1 = nn.BatchNorm2d(512)
self.relu1 = nn.ReLU()
# deconv1 1/8
self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, output_padding=1)
self.bn2 = nn.BatchNorm2d(256)
self.relu2 = nn.ReLU()
# deconv1 1/4
self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1) # 恢复成2倍
self.bn3 = nn.BatchNorm2d(128)
self.relu3 = nn.ReLU()
# deconv1 1/2
self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)
self.bn4 = nn.BatchNorm2d(64)
self.relu4 = nn.ReLU()
# deconv1 1/1
self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1)
self.bn5 = nn.BatchNorm2d(32)
self.relu5 = nn.ReLU()
self.classifier = nn.Conv2d(32, num_class, kernel_size=1)
def forward(self, x):
features = self.features(x)
m = self.deconv1(features[4]) # 512*8*8
y = self.bn1(self.relu1(self.deconv1(features[4])) + features[3])
y = self.bn2(self.relu2(self.deconv2(y)) + features[2])
y = self.bn3(self.relu3(self.deconv3(y)) + features[1])
y = self.bn4(self.relu4(self.deconv4(y)) + features[0])
y = self.bn5(self.relu5(self.deconv5(y)))
y = self.classifier(y) #恢复成原来图像大小 对每个像素点进行分类
return y
Unet
import torch.nn as nn
import torch
import torch.nn.functional as F
class DoubleConv(nn.Module):
def __init__(self,input_channels,output_channels):
super().__init__()
self.double_conv = nn.Sequential(
nn.Conv2d(input_channels,output_channels,kernel_size=3,padding=1), # shape不变
nn.BatchNorm2d(output_channels),
nn.ReLU(inplace=True),
nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(output_channels),
nn.ReLU(inplace=True),
)
def forward(self,x):
return self.double_conv(x)
class DownSample(nn.Module):
def __init__(self,in_channels, out_channels):
super().__init__()
self.downSample =nn.Sequential(
nn.MaxPool2d(kernel_size=2,stride=2),
DoubleConv(in_channels,out_channels)
)
def forward(self,x):
return self.downSample(x)
class UpSample(nn.Module):
def __init__(self, in_channels, out_channels, bilinear=True):
super().__init__()
# if bilinear, use the normal convolutions to reduce the number of channels
if bilinear:
self.up = nn.Upsample(scale_factor=2,mode="bilinear",align_corners=True) #双线性插值 shape*2
else:
self.up = nn.ConvTranspose2d(in_channels // 2, in_channels // 2, kernel_size=2, stride=2) # shape*2 不变
self.conv = DoubleConv(in_channels, out_channels)
def forward(self, x1, x2):
x1 = self.up(x1)
# input is CHW
diffY = torch.tensor([x2.size()[2] - x1.size()[2]])
diffX = torch.tensor([x2.size()[3] - x1.size()[3]])
x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
diffY // 2, diffY - diffY // 2]) #进行特征图的pad
x = torch.cat([x2, x1], dim=1)
return self.conv(x)
class OutConv(nn.Module):
def __init__(self, in_channels, out_channels):
super(OutConv, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
def forward(self, x):
return self.conv(x)
class UNet(nn.Module):
def __init__(self, n_channels, n_classes, bilinear=True):
super(UNet, self).__init__()
self.n_channels = n_channels
self.n_classes = n_classes
self.bilinear = bilinear
self.inc = DoubleConv(n_channels, 64)
self.down1 = DownSample(64, 128) # 1/2
self.down2 = DownSample(128, 256) # 1/4
self.down3 = DownSample(256, 512) # 1/8
self.down4 = DownSample(512, 512) # 1/16
self.up1 = UpSample(1024, 256, bilinear) # 1024如何理解:512个下采样featuremap +512个上采样featuremap cat
self.up2 = UpSample(512, 128, bilinear)
self.up3 = UpSample(256, 64, bilinear)
self.up4 = UpSample(128, 64, bilinear)
self.outc = OutConv(64, n_classes)
def forward(self, x):
x1 = self.inc(x)
x2 = self.down1(x1)
x3 = self.down2(x2)
x4 = self.down3(x3)
x5 = self.down4(x4)
x = self.up1(x5, x4)
x = self.up2(x, x3)
x = self.up3(x, x2)
x = self.up4(x, x1)
logits = self.outc(x)
return logits
if __name__ == '__main__':
batch_size, num_classes, h, w = 8, 5, 256, 256
x = torch.autograd.Variable(torch.randn(batch_size, 3, h, w))
print(x.shape)
net = UNet(n_channels=3, n_classes=1,bilinear=True)
t= net(x)
print(t.shape)
Encoder
import math
import torch
import torch.nn as nn
class MutiHeadSelfAttention(nn.Module):
def __init__(self, embeding_size, q_size, k_size, v_size, num_head):
"""
标准的多头注意力机制
:param embeding_size:
:param q_size: q矩阵的大小
:param k_size: k矩阵的大小
:param v_size: v矩阵的大小
:param num_head: 注意力头数
"""
super().__init__()
self.emb_size = embeding_size
self.num_head =num_head
self.q_size = q_size
self.k_size = k_size
self.v_size = v_size
self.q = nn.Linear(embeding_size, num_head*q_size)
self.k = nn.Linear(embeding_size, num_head*k_size)
self.v = nn.Linear(embeding_size, num_head*v_size)
def forward(self, q,k,v):
"""
多头(b,seq,embeding)->(b,seq,num_head*q_size)->(b,head,seq,q_zie) 单头x:(b,seq,embding_size)
:param q:
:param k:
:param v:
:return:
"""
q = self.q(q) # (b,seq,q_size)
k = self.k(k) # (b,seq,k_size)
# 计算q向量与k向量的点乘 所以q_size==k_size
# 让q与k进行点乘需要将变量进行转置
q = q.view(q.size()[0],q.size()[1],self.num_head,self.q_size).transpose(1,2) #(b,head,seq,q_zie)
k = k.view(k.size()[0],k.size()[1],self.num_head,self.k_size).transpose(1,2).transpose(2,3) #(b,head,seq,q_zie)
# 计算注意力分数
attention = torch.matmul(q,k)/math.sqrt(self.q_size) #(b,seq,seq)
# 每一个元素的attention进行softmax处理
attention = torch.softmax(attention,dim=-1)
v = self.v(v) #(b,seq,v_size)
v = v.view(v.size()[0],v.size()[1],self.num_head,self.v_size).transpose(1,2) #(b,head,seq,v_zie)
# attention*v
z = torch.matmul(attention,v) #(b,head,seq,v_size)
z = z.transpose(1,2) # (b,seq,head,v_size)
return z.reshape(z.size()[0],z.size()[1],-1) #(b,seq,head*v_size)
"""
由于transformer不能提取位置信息,人为手工加上对应的位置信息
"""
class EmbedingWithPosition(nn.Module):
def __init__(self,vocab_size,embeding_size,dropout = 0.1,seq_max_len=5000):
super().__init__()
self.embeding = nn.Embedding(vocab_size,embeding_size) #将每个元素用embeding_size个向量进行表示
# 为序列中的每个元素准备一个embeding_size长的位置编码信息
position_idx = torch.arange(0, seq_max_len, dtype=torch.float).unsqueeze(-1)
position_emb_fill = position_idx * torch.exp(-torch.arange(0, embeding_size, 2) * math.log(10000.0) / embeding_size)
pos_encoding = torch.zeros(seq_max_len, embeding_size)
pos_encoding[:, 0::2] = torch.sin(position_emb_fill)
pos_encoding[:, 1::2] = torch.cos(position_emb_fill)
self.register_buffer('pos_encoding', pos_encoding) # 固定参数,不需要train
# 防过拟合
self.dropout = nn.Dropout(dropout)
def forward(self, x): # x: (batch_size,seq_len)
x = self.seq_emb(x) # x: (batch_size,seq_len,emb_size)
x = x + self.pos_encoding.unsqueeze(0)[:, :x.size()[1], :] # x: (batch_size,seq_len,emb_size)
return self.dropout(x)
class Encoder_Block(nn.Module):
def __init__(self,q_k_size,v_size,num_head,mlp_size,embeding_size):
super().__init__()
self.attention = MutiHeadSelfAttention(embeding_size,q_k_size,q_k_size,v_size,num_head)
self.linear = nn.Linear(num_head*v_size,embeding_size)
self.norm_1 = nn.LayerNorm(embeding_size)
self.feedforward = nn.Sequential(
nn.Linear(embeding_size, mlp_size),
nn.ReLU(),
nn.Linear(mlp_size, embeding_size)
)
self.norm_2 = nn.LayerNorm(embeding_size) # 按last dim做norm/
# 每个ebeding_block都保证输出的size为embeding_size大小,方便进行Encoder的堆叠
def forward(self, x):
attention = self.attention(x,x,x)
linear = self.linear(attention)
norm_1 = self.norm_1(linear+x)
out = self.feedforward(norm_1)
res = self.norm_2(out+norm_1)
return res
class Encoder(nn.Module):
def __init__(self,vocab_size,embeding_size,num_layer,q_k_size,v_size,num_head,mlp_size):
super().__init__()
self.embed_position = EmbedingWithPosition(vocab_size,embeding_size)
self.model = nn.ModuleList()
for i in range(num_layer):
self.model.add_module("Encoder"+str(i),Encoder_Block(q_k_size,v_size,num_head,mlp_size,embeding_size))
def forward(self,x):
pad_mask = (x == PAD_IDX).unsqueeze(1) # pad_mask:(batch_size,1,seq_len)
pad_mask = pad_mask.expand(x.size()[0], x.size()[1], x.size()[1]) # pad_mask:(batch_size,seq_len,seq_len)
# pad_mask = pad_mask.to(DEVICE)
x = self.embed_position(x)
for block in self.model:
x = block(x) # x:(batch_size,seq_len,emb_size)
return x