Deconv的python实现

之前一段时间学习cs231n时,写过的几个naive版本的conv和deconv函数,今天拿出来跟大家分享。

conv

def conv(X, w, b, conv_param):
    '''
    X: input with shape (C, H, W)
    W: filter with shape (C, HH, WW)
    b: bias float
    '''

    C, H, W = X.shape
    C, HH, WW = w.shape
    pad = conv_param['pad']
    stride = conv_param['stride']

    # padding
    npad = ((0, 0), (pad, pad), (pad, pad))
    X = np.pad(X, pad_width = npad, mode = 'constant', constant_values = 0)

    H_o = 1 + (H + 2 * pad - HH) // stride
    W_o = 1 + (W + 2 * pad - WW) // stride
    
    # conv
    Y = np.zeros((H_o, W_o))
    for i in range(H_o):
        for j in range(W_o):
            left_top_y, left_top_x = i * stride, j * stride
            conv_map = X[:, left_top_y:(left_top_y + HH), left_top_x:(left_top_x + HH)] * w
            Y[i, j]  = np.sum(conv_map) + b

    return Y
def conv_forward_naive(x, w, b, conv_param):
    """
    A naive implementation of the forward pass for a convolutional layer.

    The input consists of N data points, each with C channels, height H and width
    W. We convolve each input with F different filters, where each filter spans
    all C channels and has height HH and width HH.

    Input:
    - x: Input data of shape (N, C, H, W)
    - w: Filter weights of shape (F, C, HH, WW)
    - b: Biases, of shape (F,)
    - conv_param: A dictionary with the following keys:
      - 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions.
      - 'pad'   : The number of pixels that will be used to zero-pad the input.

    Returns a tuple of:
    - out: Output data, of shape (N, F, H', W') where H' and W' are given by
                 H' = 1 + (H + 2 * pad - HH) // stride
                 W' = 1 + (W + 2 * pad - WW) // stride
    - cache: (x, w, b, conv_param)
    """

    out = None

    # get params
    N, C, H, W   = x.shape
    F, C, HH, WW = w.shape

    # conv for evry image
    out = []
    for i in range(N):
        channel_list = []
        for j in range(F):
            y = conv(x[i], w[j], b[j], conv_param)
            channel_list.append(y)
        out.append(channel_list)

    out = np.array(out)

    cache = (x, w, b, conv_param)
    return out, cache

deconv

def conv_backward_naive(dout, cache):
    """
    A naive implementation of the backward pass for a convolutional layer.

    Inputs:
    - dout: Upstream derivatives.
    - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive

    Returns a tuple of:
    - dx: Gradient with respect to x
    - dw: Gradient with respect to w
    - db: Gradient with respect to b
    """

    dx, dw, db = None, None, None

    x, w, b, conv_param = cache
    stride = conv_param['stride']
    pad = conv_param['pad']
    N, C, H, W = x.shape
    F, _, HH, WW = w.shape
    _, _, H_o, W_o = dout.shape

    npad  = ((0, 0), (0, 0), (pad, pad), (pad, pad))
    x_pad = np.pad(x, pad_width = npad, mode = 'constant', constant_values = 0)

    # calculate the db
    db, temp = np.zeros((F)), dout.transpose(1, 0, 2, 3)
    for f in range(F):
        db[f] = np.sum(temp[f, :, :, :])
    
    # calculate the dx and dw
    dw, dx_pad = np.zeros(w.shape), np.zeros(x_pad.shape)
    for n in range(N):
        for f in range(F):
            for i in range(H_o):
                for j in range(W_o):
                    y_left_top, x_left_top = i * stride, j * stride
                    current_x_matrix = x_pad[n, :, y_left_top:(y_left_top + HH), x_left_top:(x_left_top + WW)]
                    dw[f] += dout[n, f, i, j] * current_x_matrix
                    dx_pad[n, :, y_left_top:(y_left_top + HH), x_left_top:(x_left_top + WW)] += w[f] * dout[n, f, i, j]

    dx = dx_pad[:, :, pad: H + pad, pad: W + pad]
    return dx, dw, db

 

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
实现灰白图像上色可以使用深度学习的方法,具体可以使用卷积神经网络(Convolutional Neural Network, CNN)实现。以下是基于 PyTorch 框架的实现代码,假设原图像是灰度图像,需要将其上色为彩色图像。 首先导入必要的库: ```python import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader from torchvision import transforms from PIL import Image ``` 定义数据集类,读取灰度图像和对应的彩色图像: ```python class ImageDataset(Dataset): def __init__(self, data_path, transform=None): self.data_path = data_path self.transform = transform self.image_filenames = os.listdir(os.path.join(self.data_path, 'gray')) def __len__(self): return len(self.image_filenames) def __getitem__(self, index): gray_image = Image.open(os.path.join(self.data_path, 'gray', self.image_filenames[index])) color_image = Image.open(os.path.join(self.data_path, 'color', self.image_filenames[index])) if self.transform: gray_image = self.transform(gray_image) color_image = self.transform(color_image) return gray_image, color_image ``` 定义卷积神经网络模型,包含一个预处理层(将灰度图像转换为 RGB 图像)、若干个卷积层和反卷积层(上采样)、一个输出层(将图像的像素值限制在 [0, 1] 区间内): ```python class ColorNet(nn.Module): def __init__(self): super(ColorNet, self).__init__() self.preprocess = nn.Sequential( nn.Conv2d(1, 3, kernel_size=1, stride=1), nn.BatchNorm2d(3), nn.ReLU(inplace=True) ) self.conv1 = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True) ) self.conv2 = nn.Sequential( nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True) ) self.conv3 = nn.Sequential( nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True) ) self.conv4 = nn.Sequential( nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True) ) self.conv5 = nn.Sequential( nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True) ) self.conv6 = nn.Sequential( nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True) ) self.conv7 = nn.Sequential( nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True) ) self.deconv1 = nn.Sequential( nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True) ) self.deconv2 = nn.Sequential( nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True) ) self.deconv3 = nn.Sequential( nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True) ) self.deconv4 = nn.Sequential( nn.ConvTranspose2d(64, 3, kernel_size=3, stride=1, padding=1), nn.Tanh() ) def forward(self, x): x = self.preprocess(x) x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = self.conv4(x) x = self.conv5(x) x = self.conv6(x) x = self.conv7(x) x = self.deconv1(x) x = self.deconv2(x) x = self.deconv3(x) x = self.deconv4(x) return x ``` 定义训练函数: ```python def train(model, criterion, optimizer, dataloader, device): model.train() running_loss = 0.0 for i, data in enumerate(dataloader): inputs, labels = data[0].to(device), data[1].to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 10 == 9: # print every 10 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 10)) running_loss = 0.0 ``` 定义测试函数,用于测试模型在测试集上的表现: ```python def test(model, criterion, dataloader, device): model.eval() test_loss = 0.0 with torch.no_grad(): for data in dataloader: inputs, labels = data[0].to(device), data[1].to(device) outputs = model(inputs) test_loss += criterion(outputs, labels).item() test_loss /= len(dataloader.dataset) print('Test Loss: {:.6f}'.format(test_loss)) ``` 定义训练参数和训练过程: ```python data_transforms = transforms.Compose([ transforms.Resize((256, 256)), transforms.ToTensor() ]) train_dataset = ImageDataset('data/train', transform=data_transforms) train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True) test_dataset = ImageDataset('data/test', transform=data_transforms) test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = ColorNet().to(device) criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=0.0002) for epoch in range(50): train(model, criterion, optimizer, train_dataloader, device) test(model, criterion, test_dataloader, device) ``` 最后,使用训练好的模型对一张灰度图像进行上色: ```python gray_image = Image.open('test_gray.png').convert('L') gray_image_transformed = data_transforms(gray_image).unsqueeze(0).to(device) color_image = model(gray_image_transformed) color_image = color_image.squeeze(0).cpu().detach().numpy().transpose(1, 2, 0) color_image = (color_image + 1) / 2 # 将像素值从 [-1, 1] 转换为 [0, 1] Image.fromarray((color_image * 255).astype('uint8')).show() ``` 其中 `'test_gray.png'` 是一张灰度图像的文件名。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

ReLuJie

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值