style-transfer是把你的图片变成你想要风格的公开源码,是论文A Neural Algorithm of Artistic Style 的复现。
def _compute_reprs(net_in, net, layers_style, layers_content, gram_scale=1):
"""
Computes representation matrices for an image.
"""
# input data and forward pass
(repr_s, repr_c) = ({}, {})
net.blobs["data"].data[0] = net_in
net.forward()
# loop through combined set of layers
for layer in set(layers_style)|set(layers_content):
F = net.blobs[layer].data[0].copy()
F.shape = (F.shape[0], -1)
repr_c[layer] = F
if layer in layers_style:
repr_s[layer] = sgemm(gram_scale, F, F.T)
return repr_s, repr_c
首先正向传播就算出各层feature map,再将特征矩阵保存返回。其中网络的权重定义如下:
VGG16_WEIGHTS = {"content": {"conv4_2": 1},
"style": {"conv1_1": 0.2,
"conv2_1": 0.2,
"conv3_1": 0.2,
"conv4_1": 0.2,
"conv5_1": 0.2}}
内容只用了一层conv4_2,风格用了5层。我们可以再用key()来获取层名:
layers_style = weights["style"].keys()
layers_content = weights["content"].keys()
========================================================================================================
def _compute_style_grad(F, G, G_style, layer):
"""
Computes style gradient and loss from activation features.
"""
# compute loss and gradient
(Fl, Gl) = (F[layer], G[layer])
c = Fl.shape[0]**-2 * Fl.shape[1]**-2
El = Gl - G_style[layer]
loss = c/4 * (El**2).sum()
grad = c * sgemm(1.0, El, Fl) * (Fl>0)
return loss, grad
这个是完成风格梯度以及loss的计算。
def _compute_content_grad(F, F_content, layer):
"""
Computes content gradient and loss from activation features.
"""
# compute loss and gradient
Fl = F[layer]
El = Fl - F_content[layer]
loss = (El**2).sum() / 2
grad = El * (Fl>0)
return loss, grad
这个是完成内容梯度以及loss的计算。
========================================================================================================
def _make_noise_input(self, init):
"""
Creates an initial input (generated) image.
"""
# specify dimensions and create grid in Fourier domain
dims = tuple(self.net.blobs["data"].data.shape[2:]) + \
(self.net.blobs["data"].data.shape[1], )
grid = np.mgrid[0:dims[0], 0:dims[1]]
# create frequency representation for pink noise
Sf = (grid[0] - (dims[0]-1)/2.0) ** 2 + \
(grid[1] - (dims[1]-1)/2.0) ** 2
Sf[np.where(Sf == 0)] = 1
Sf = np.sqrt(Sf)
Sf = np.dstack((Sf**int(init),)*dims[2])
# apply ifft to create pink noise and normalize
ifft_kernel = np.cos(2*np.pi*np.random.randn(*dims)) + \
1j*np.sin(2*np.pi*np.random.randn(*dims))
img_noise = np.abs(ifftn(Sf * ifft_kernel))
img_noise -= img_noise.min()
img_noise /= img_noise.max()
# preprocess the pink noise image
x0 = self.transformer.preprocess("data", img_noise)
return x0
制造最开始的噪声输入,我们就是在这个上面进行不断的拟合~
========================================================================================================
def transfer_style(self, img_style, img_content, length=512, ratio=1e5,
n_iter=512, init="-1", verbose=False, callback=None):
"""
Transfers the style of the artwork to the input image.
:param numpy.ndarray img_style:
A style image with the desired target style.
:param numpy.ndarray img_content:
A content image in floating point, RGB format.
:param function callback:
A callback function, which takes images at iterations.
"""
# assume that convnet input is square
orig_dim = min(self.net.blobs["data"].shape[2:])
# rescale the images
scale = max(length / float(max(img_style.shape[:2])),
orig_dim / float(min(img_style.shape[:2])))
img_style = rescale(img_style, STYLE_SCALE*scale)
scale = max(length / float(max(img_content.shape[:2])),
orig_dim / float(min(img_content.shape[:2])))
img_content = rescale(img_content, scale)
# compute style representations
self._rescale_net(img_style)
layers = self.weights["style"].keys()
net_in = self.transformer.preprocess("data", img_style)
gram_scale = float(img_content.size)/img_style.size
G_style = _compute_reprs(net_in, self.net, layers, [],
gram_scale=1)[0]
# compute content representations
self._rescale_net(img_content)
layers = self.weights["content"].keys()
net_in = self.transformer.preprocess("data", img_content)
F_content = _compute_reprs(net_in, self.net, [], layers)[1]
# generate initial net input
# "content" = content image, see kaishengtai/neuralart
if isinstance(init, np.ndarray):
img0 = self.transformer.preprocess("data", init)
elif init == "content":
img0 = self.transformer.preprocess("data", img_content)
elif init == "mixed":
img0 = 0.95*self.transformer.preprocess("data", img_content) + \
0.05*self.transformer.preprocess("data", img_style)
else:
img0 = self._make_noise_input(init)
# compute data bounds
data_min = -self.transformer.mean["data"][:,0,0]
data_max = data_min + self.transformer.raw_scale["data"]
data_bounds = [(data_min[0], data_max[0])]*(img0.size/3) + \
[(data_min[1], data_max[1])]*(img0.size/3) + \
[(data_min[2], data_max[2])]*(img0.size/3)
# optimization params
grad_method = "L-BFGS-B"
reprs = (G_style, F_content)
minfn_args = {
"args": (self.net, self.weights, self.layers, reprs, ratio),
"method": grad_method, "jac": True, "bounds": data_bounds,
"options": {"maxcor": 8, "maxiter": n_iter, "disp": verbose}
}
# optimize
self._callback = callback
minfn_args["callback"] = self.callback
if self.use_pbar and not verbose:
self._create_pbar(n_iter)
self.pbar.start()
res = minimize(style_optfn, img0.flatten(), **minfn_args).nit
self.pbar.finish()
else:
res = minimize(style_optfn, img0.flatten(), **minfn_args).nit
return res
开始风格迁移~注释写得很详细自己动手看吧