这个项目将使用 PyTorch 实现图像风格迁移(Neural Style Transfer),让一张图片看起来具有另一张图片的“艺术风格”。
🔧 开发环境建议
Python 3.8+
PyTorch(pip install torch torchvision)
PIL(pip install pillow)
CUDA(可选,但建议有 GPU)
🗂️ 项目结构示例
bash
style_transfer/
├── style.jpg # 风格图像(如:星夜)
├── content.jpg # 内容图像(如:你的自拍)
├── style_transfer.py # 主程序
✅ Step-by-step:图像风格迁移项目详解(含代码)
✅ Step 1:导入库
python
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import copy
import matplotlib.pyplot as plt
✅ Step 2:设备设置(使用 GPU 优先)
python
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
✅ Step 3:图像加载与预处理函数
python
def load_image(img_path, max_size=400):
image = Image.open(img_path).convert('RGB')
# 限制最大尺寸
if max(image.size) > max_size:
size = max_size
else:
size = max(image.size)
in_transform = transforms.Compose([
transforms.Resize(size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
image = in_transform(image).unsqueeze(0)
return image.to(device)
✅ Step 4:定义函数来展示图像
python
def im_convert(tensor):
image = tensor.to("cpu").clone().detach()
image = image.squeeze(0)
image = transforms.ToPILImage()(image)
return image
✅ Step 5:加载内容图像和风格图像
python
content = load_image("content.jpg").to(device)
style = load_image("style.jpg").to(device)
✅ Step 6:加载预训练的 VGG 模型(VGG19)
python
vgg = models.vgg19(pretrained=True).features.to(device).eval()
我们只取模型中的某些层用于提取风格与内容特征。
✅ Step 7:定义提取内容和风格特征的函数
python
def get_features(image, model, layers=None):
if layers is None:
layers = {
'0': 'conv1_1',
'5': 'conv2_1',
'10': 'conv3_1',
'19': 'conv4_1',
'21': 'conv4_2', # content representation
'28': 'conv5_1'
}
features = {}
x = image
for name, layer in model._modules.items():
x = layer(x)
if name in layers:
features[layers[name]] = x
return features
✅ Step 8:定义 Gram 矩阵函数(风格提取核心)
python
def gram_matrix(tensor):
b, d, h, w = tensor.size()
tensor = tensor.view(d, h * w)
gram = torch.mm(tensor, tensor.t())
return gram
✅ Step 9:提取内容和风格特征
python
content_features = get_features(content, vgg)
style_features = get_features(style, vgg)
# 为每个风格层计算 Gram 矩阵
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}
✅ Step 10:初始化目标图像(从内容图像复制)
python
target = content.clone().requires_grad_(True).to(device)
✅ Step 11:设置超参数
python
style_weights = {
'conv1_1': 1.0,
'conv2_1': 0.75,
'conv3_1': 0.2,
'conv4_1': 0.2,
'conv5_1': 0.2
}
content_weight = 1e4 # α
style_weight = 1e2 # β
✅ Step 12:设置优化器
python
optimizer = optim.Adam([target], lr=0.003)
✅ Step 13:训练模型
python
steps = 300
for i in range(1, steps+1):
target_features = get_features(target, vgg)
content_loss = torch.mean((target_features['conv4_2'] - content_features['conv4_2']) ** 2)
style_loss = 0
for layer in style_weights:
target_feature = target_features[layer]
target_gram = gram_matrix(target_feature)
style_gram = style_grams[layer]
layer_style_loss = style_weights[layer] * torch.mean((target_gram - style_gram) ** 2)
style_loss += layer_style_loss / (target_feature.shape[1] ** 2)
total_loss = content_weight * content_loss + style_weight * style_loss
optimizer.zero_grad()
total_loss.backward()
optimizer.step()
if i % 50 == 0:
print(f"Step {i}, Total loss: {total_loss.item():.4f}")
✅ Step 14:保存并显示结果图像
python
final_img = im_convert(target)
final_img.save("result.jpg")
final_img.show()
🖼️ 示例效果
将自拍和《星夜.jpg》结合,输出一张油画风格的人像。
附上完整代码:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import copy
# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 图像加载和预处理
def load_image(img_path, max_size=400):
image = Image.open(img_path).convert('RGB')
if max(image.size) > max_size:
size = max_size
else:
size = max(image.size)
in_transform = transforms.Compose([
transforms.Resize(size),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
image = in_transform(image).unsqueeze(0)
return image.to(device)
# 还原为可视图像
def im_convert(tensor):
image = tensor.to("cpu").clone().detach()
image = image.squeeze(0)
image = transforms.ToPILImage()(image)
return image
# 提取特征
def get_features(image, model, layers=None):
if layers is None:
layers = {
'0': 'conv1_1',
'5': 'conv2_1',
'10': 'conv3_1',
'19': 'conv4_1',
'21': 'conv4_2', # 内容层
'28': 'conv5_1'
}
features = {}
x = image
for name, layer in model._modules.items():
x = layer(x)
if name in layers:
features[layers[name]] = x
return features
# Gram矩阵
def gram_matrix(tensor):
b, d, h, w = tensor.size()
tensor = tensor.view(d, h * w)
gram = torch.mm(tensor, tensor.t())
return gram
# 主程序入口
def main():
# 加载图像
content = load_image("content.jpg")
style = load_image("style.jpg")
# 加载预训练模型
vgg = models.vgg19(pretrained=True).features.to(device).eval()
content_features = get_features(content, vgg)
style_features = get_features(style, vgg)
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}
target = content.clone().requires_grad_(True).to(device)
# 权重设置
style_weights = {
'conv1_1': 1.0,
'conv2_1': 0.75,
'conv3_1': 0.2,
'conv4_1': 0.2,
'conv5_1': 0.2
}
content_weight = 1e4
style_weight = 1e2
optimizer = optim.Adam([target], lr=0.003)
steps = 300
print("开始风格迁移...")
for i in range(1, steps + 1):
target_features = get_features(target, vgg)
content_loss = torch.mean((target_features['conv4_2'] - content_features['conv4_2']) ** 2)
style_loss = 0
for layer in style_weights:
target_feature = target_features[layer]
target_gram = gram_matrix(target_feature)
style_gram = style_grams[layer]
layer_style_loss = style_weights[layer] * torch.mean((target_gram - style_gram) ** 2)
style_loss += layer_style_loss / (target_feature.shape[1] ** 2)
total_loss = content_weight * content_loss + style_weight * style_loss
optimizer.zero_grad()
total_loss.backward()
optimizer.step()
if i % 50 == 0:
print(f"Step {i}/{steps}, Total loss: {total_loss.item():.4f}")
# 保存结果
result = im_convert(target)
result.save("result.jpg")
print("风格迁移完成!结果保存在 result.jpg")
# 运行主函数
if __name__ == "__main__":
main()
✅ 使用说明
🖼️ 准备:
把你的内容图命名为 content.jpg
把你的风格图命名为 style.jpg
放在与 style_transfer.py 同一个目录下
▶️ 运行:
bash
python style_transfer.py
#🖼️ 输出:
运行成功后,生成的图像将保存在:
result.jpg