"""
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
# 指定支持中文的字体
plt.rcParams['font.sans-serif'] = ['SimHei'] # Windows 系统下使用黑体
plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题
# 定义代码
code ="""
__author__ = 'Eric'
#导包
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Input,GlobalMaxPooling2D,Dense,Flatten,Dropout,Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import L2
from tensorflow.keras.callbacks import ReduceLROnPlateau#用来做学习率回调
from tensorflow.keras.initializers import RandomNormal,Zeros#参数初始化
import os
import sys
'''
1、数据预处理
'''
'''
数据集格式:
dataset/
├── train/
│ ├── class1/
│ │ ├── img1.jpg
│ │ ├── img2.jpg
│ │ └── ...
│ ├── class2/
│ │ ├── img1.jpg
│ │ ├── img2.jpg
│ │ └── ...
│ └── ...
└── validation/
├── class1/
│ ├── img1.jpg
│ ├── img2.jpg
│ └── ...
├── class2/
│ ├── img1.jpg
│ ├── img2.jpg
│ └── ...
└── ...
'''
#TensorFlow 默认会预分配显存,但你可以通过设置 allow_growth 为 True,让 TensorFlow 按需分配显存。
# 设置动态内存分配
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
print("显存动态分配成功!")
except RuntimeError as e:
print(e)
#①指定GPU使用量,占用GPU90%的显存
for gpu in gpus:
tf.config.experimental.per_process_gpu_memory_fraction = 0.9
'''
# 全局设置默认设备为 CPU
tf.config.experimental.set_visible_devices([], 'GPU')
'''
#TF_GPU_ALLOCATOR=cuda_malloc_async,改善显存环境,减少显存碎片化
#os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
def loadDataset():
# 使用 CPU 进行图像预处理
with tf.device('/cpu:0'):
# 定义数据集路径
#train_dir = 'dataset/train'
#validation_dir = 'dataset/validation'
train_dir = r"D:\2025College Student Innovation and Entrepreneurship Project\project\VGG\dataset\data\train"
validation_dir = r"D:\2025College Student Innovation and Entrepreneurship Project\project\VGG\dataset\data\validation"
test_dir = r"D:\2025College Student Innovation and Entrepreneurship Project\project\VGG\dataset\data\test"
'''
#检查是否为UTF-8编码格式
def is_valid_utf8(s):
try:
s.encode('utf-8')
return True
except UnicodeEncodeError:
return False
# 检查训练集
for class_name in os.listdir(train_dir):
class_dir = os.path.join(train_dir, class_name)
if not is_valid_utf8(class_name):
print(f"Invalid UTF-8 class name: {class_name}")
for image_name in os.listdir(class_dir):
if not is_valid_utf8(image_name):
print(f"Invalid UTF-8 image name: {image_name}")
# 检查验证集
for class_name in os.listdir(validation_dir):
class_dir = os.path.join(validation_dir, class_name)
if not is_valid_utf8(class_name):
print(f"Invalid UTF-8 class name: {class_name}")
for image_name in os.listdir(class_dir):
if not is_valid_utf8(image_name):
print(f"Invalid UTF-8 image name: {image_name}")
print("OK")
'''
# 获取所有类别名称
class_names = sorted(os.listdir(train_dir)) # 假设训练集和验证集的类别名称一致
class_to_label = {class_name: idx for idx, class_name in enumerate(class_names)}#类别名称与索引标签映射
# 定义图像加载函数
def load_image(image_path, label):
with tf.device('/cpu:0'):
image = tf.io.read_file(image_path) # 读取图像文件
image = tf.image.decode_image(image, channels=3) # 解码图像
# 显式设置图像的形状
image.set_shape([None, None, 3])
return image, label
#解决TF对象不可哈希的问题,因此不能直接用作字典的键
def convert_label(image, label):
with tf.device('/cpu:0'):
label = label.numpy().decode('utf-8', errors='ignore') # 忽略解码错误
label = class_to_label[label] # 将类别名称转换为整数标签
return image, tf.constant(label)
#1、 定义动态调整图像尺寸的函数【单尺度,各向同性缩放】(S=256,S=384)
#先训练S=256,然后训练S=384(权重迁移),第二次训练学习率为0.001
'''
ratio=S/min(height,width)
new_height=round(height×ratio)
new_width=round(width×ratio)
'''
def dynamic_resize1(image, label, S=256,flip_rate = 0.5,max_delta=0.2):
with tf.device('/cpu:0'):
# 获取图像的原始尺寸
original_shape = tf.shape(image)
original_height = original_shape[0]
original_width = original_shape[1]
# 计算新的尺寸
# 保持宽高比例,使最短边等于 S
ratio = tf.cast(S, tf.float32) / tf.cast(tf.minimum(original_height, original_width), tf.float32)
new_height = tf.cast(tf.round(tf.cast(original_height, tf.float32) * ratio), tf.int32)
new_width = tf.cast(tf.round(tf.cast(original_width, tf.float32) * ratio), tf.int32)
# 调整图像尺寸
image = tf.image.resize(image, (new_height, new_width))
# 裁剪图像到 S x S
# 选择中心裁剪或随机裁剪
if S == 256 or S == 384:
# 随机裁剪
image = tf.image.random_crop(image, (224, 224, 3))
else:
# 中心裁剪
image = tf.image.resize_with_crop_or_pad(image, 224, 224)
# 随机决定是否翻转(50% 概率)
if tf.random.uniform(()) > flip_rate:
image = tf.image.flip_left_right(image)
#随机RGB颜色偏移(直接调整亮度)
image=tf.image.random_brightness(image, max_delta=max_delta)
return image, label
#2、 定义动态调整图像尺寸的函数【多尺度,各向同性缩放】(Smin=256,Smax=512)
#先用相同配置网络训练S=384,在用权重迁移训练多尺度网络
def dynamic_resize2(image, label, Smin=256,Smax=512):
with tf.device('/cpu:0'):
# 随机选择一个尺寸
new_size = tf.random.uniform(shape=[], minval=Smin, maxval=Smax, dtype=tf.int32)
# 调整图像尺寸
image = tf.image.resize(image, (new_size, new_size))
# 裁剪图像到 S x S
# 选择中心裁剪或随机裁剪
if Smin == 256 and Smax == 512:
# 随机裁剪
image = tf.image.random_crop(image, (224, 224, 3))
else:
# 中心裁剪
image = tf.image.resize_with_crop_or_pad(image, 224, 224)
return image, label
# 加载训练集
train_image_paths = []
train_labels = []
for class_name in class_names:
class_dir = os.path.join(train_dir, class_name)
for image_name in os.listdir(class_dir):
image_path = os.path.join(class_dir, image_name)
train_image_paths.append(image_path)
train_labels.append(class_name)
# 加载验证集
validation_image_paths = []
validation_labels = []
for class_name in class_names:
class_dir = os.path.join(validation_dir, class_name)
for image_name in os.listdir(class_dir):
image_path = os.path.join(class_dir, image_name)
validation_image_paths.append(image_path)
validation_labels.append(class_name)
#加载测试集
test_image_paths = []
test_labels = []
for class_name in class_names:
class_dir = os.path.join(test_dir, class_name)
for image_name in os.listdir(class_dir):
image_path = os.path.join(class_dir, image_name)
test_image_paths.append(image_path)
test_labels.append(class_name)
# 创建训练集数据集
train_dataset = tf.data.Dataset.from_tensor_slices((train_image_paths, train_labels))
#print(train_dataset)
# 使用 map 函数对每个路径调用 load_image 函数
train_dataset = train_dataset.map(load_image,num_parallel_calls=tf.data.AUTOTUNE)#map函数默认在图模式下运行
#print(train_dataset)
train_dataset = train_dataset.map(lambda x, y: dynamic_resize1(x, y), num_parallel_calls=tf.data.AUTOTUNE)
#print(train_dataset)
train_dataset = train_dataset.map(lambda x, y: tf.py_function(convert_label, [x, y], [tf.float32, tf.int32]), num_parallel_calls=tf.data.AUTOTUNE)
#print(train_dataset)#tf.data.Dataset 的 shapes 属性描述的是数据集中每个元素的形状,而不是整个数据集的综合形状。
# 创建验证集数据集
validation_dataset = tf.data.Dataset.from_tensor_slices((validation_image_paths, validation_labels))
validation_dataset = validation_dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE)
validation_dataset = validation_dataset.map(lambda x, y: dynamic_resize1(x, y), num_parallel_calls=tf.data.AUTOTUNE)
validation_dataset = validation_dataset.map(lambda x, y: tf.py_function(convert_label, [x, y], [tf.float32, tf.int32]), num_parallel_calls=tf.data.AUTOTUNE)
# 创建测试集数据集
test_dataset = tf.data.Dataset.from_tensor_slices((test_image_paths, test_labels))
test_dataset = test_dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.map(lambda x, y: dynamic_resize1(x, y), num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.map(lambda x, y: tf.py_function(convert_label, [x, y], [tf.float32, tf.int32]), num_parallel_calls=tf.data.AUTOTUNE)
#显式指定图片尺寸
train_dataset = train_dataset.map(
lambda x, y: (tf.reshape(x, (224, 224, 3)), tf.reshape(y,())),
num_parallel_calls=tf.data.AUTOTUNE
)
validation_dataset = validation_dataset.map(
lambda x, y: (tf.reshape(x, (224, 224, 3)), tf.reshape(y,())),
num_parallel_calls=tf.data.AUTOTUNE
)
test_dataset = test_dataset.map(
lambda x, y: (tf.reshape(x, (224, 224, 3)), tf.reshape(y,())),
num_parallel_calls=tf.data.AUTOTUNE
)
# 批量处理和预取
train_dataset = train_dataset.cache().shuffle(1000).batch(128).prefetch(buffer_size=tf.data.AUTOTUNE)
validation_dataset = validation_dataset.cache().batch(128).prefetch(buffer_size=tf.data.AUTOTUNE)
test_dataset = test_dataset.cache().batch(128).prefetch(buffer_size=tf.data.AUTOTUNE)
#print(train_dataset)
#print(validation_dataset)
#print(test_dataset)
#cache方法是 TensorFlow 中的一个优化工具,用于缓存数据集的内容。它可以在首次迭代数据集时将数据加载到内存中,从而在后续迭代中直接从内存中读取数据,而不是每次都从磁盘重新加载。这可以显著提高数据加载的效率,尤其是在数据集较大且多次迭代时。
for batch in train_dataset.take(1):
print(batch[0].shape, batch[1].shape) # 查看真实形状
'''
#print(train_dataset)
# 可视化加载的图像(训练集)
for images, labels in train_dataset.take(1):
for i in range(9):
plt.subplot(3, 3, i + 1)
plt.imshow(images[i].numpy().astype("uint8"))
plt.title(f'Label: {labels[i].numpy()}') # 显示整数标签
plt.axis("off")
plt.savefig("train_dataset.png")
plt.show()
break
# 可视化加载的图像(验证集)
for images, labels in validation_dataset.take(1):
for i in range(9):
plt.subplot(3, 3, i + 1)
plt.imshow(images[i].numpy().astype("uint8"))
plt.title(f'Label: {labels[i].numpy()}') # 显示整数标签
plt.axis("off")
plt.savefig("validation_dataset.png")
plt.show()
break
'''
return train_dataset,validation_dataset,test_dataset
"""
2、网络架构
"""
#定义A模型架构
#11 weight layers
#对于第一个网络,每个网络层权重用正态分布进行初始化,偏置全部初始化为0
def VGG_A(inputs):
#第一部分卷积
#Input 224x224x3
#64个卷积核,卷积核大小3x3,步长为1,激活函数为relu,填充至大小相等
#Output 224x224x64
#初始化偏置为0,使用均值为0方差为0.01(10**(-2)的正态分布进行初始化)
x = Conv2D(filters=64,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same',
kernel_initializer=RandomNormal(mean=0.0, stddev=0.01),
bias_initializer=Zeros())(inputs)
#第一部分池化
#Input 224x224x64
#Output 112x112x64
x = MaxPooling2D()(x)
#第二部分卷积
#Input 112x112x64
#128个卷积核,卷积核大小3x3,步长为1,激活函数为relu,填充至大小相等
#Output 112x112x128
#初始化偏置为0,使用均值为0方差为0.01(10**(-2)的正态分布进行初始化)
x = Conv2D(filters=128,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same',
kernel_initializer=RandomNormal(mean=0.0, stddev=0.01),
bias_initializer=Zeros())(x)
#第二部分最大池化
#Input 112x112x128
#Output 56x56x128
x = MaxPooling2D()(x)
#第三部分卷积
#Input 56x56x128
#Output 56x56x256
#初始化偏置为0,使用均值为0方差为0.01(10**(-2)的正态分布进行初始化)
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same',
kernel_initializer=RandomNormal(mean=0.0, stddev=0.01),
bias_initializer=Zeros())(x)
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same',
kernel_initializer=RandomNormal(mean=0.0, stddev=0.01),
bias_initializer=Zeros())(x)
#第三部分最大池化
#Input 56x56x256
#Output 28x28x256
x = MaxPooling2D()(x)
#第四部分卷积
#Input 28x28x256
#Output 28x28x512
#初始化偏置为0,使用均值为0方差为0.01(10**(-2)的正态分布进行初始化)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same',
kernel_initializer=RandomNormal(mean=0.0, stddev=0.01),
bias_initializer=Zeros())(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same',
kernel_initializer=RandomNormal(mean=0.0, stddev=0.01),
bias_initializer=Zeros())(x)
#第四部分最大池化
#Input 28x28x512
#Output 14x14x512
x = MaxPooling2D()(x)
#第五部分卷积
#Input 14x14x512
#Output 14x14x512
#初始化偏置为0,使用均值为0方差为0.01(10**(-2)的正态分布进行初始化)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same',
kernel_initializer=RandomNormal(mean=0.0, stddev=0.01),
bias_initializer=Zeros())(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same',
kernel_initializer=RandomNormal(mean=0.0, stddev=0.01),
bias_initializer=Zeros())(x)
#第五部分最大池化
#Input 14x14x512
#Output 7x7x512
x = MaxPooling2D()(x)
#初始化偏置为0,使用均值为0方差为0.01(10**(-2)的正态分布进行初始化)
#三个全连接层(其中有添加Flatten层作为数据展平,不过论文中没有写)
x = Flatten()(x)
x = Dense(4096,activation = 'relu',kernel_regularizer=L2(5*(10)**(-4)),
kernel_initializer=RandomNormal(mean=0.0, stddev=0.01),
bias_initializer=Zeros())(x)
x = tf.nn.dropout(x, 0.5) # 应用Dropout
x = Dense(4096,activation = 'relu',kernel_regularizer=L2(5*(10)**(-4)),
kernel_initializer=RandomNormal(mean=0.0, stddev=0.01),
bias_initializer=Zeros())(x)#前两个全连接层添加系数为5*(10)**(-4)的L2正则化
x = tf.nn.dropout(x, 0.5) # 应用Dropout
outputs = Dense(1000,activation = 'softmax',
kernel_initializer=RandomNormal(mean=0.0, stddev=0.01),
bias_initializer=Zeros())(x)
#返回输出值
return outputs
#定义A_LRN模型架构
#11 weight layers
#对于其他网络,与前一网络重合部分沿用前一网络权重,其他网络层权重用正态分布进行初始化,偏置全部初始化为0
def VGG_A_LRN(inputs):
#第一部分卷积
#Input 224x224x3
#64个卷积核,卷积核大小3x3,步长为1,激活函数为relu,填充至大小相等
#Output 224x224x64
x = Conv2D(filters=64,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(inputs)
#添加LRN归一化层
x = tf.nn.local_response_normalization(x)
#第一部分池化
#Input 224x224x64
#Output 112x112x64
x = MaxPooling2D()(x)
#第二部分卷积
#Input 112x112x64
#128个卷积核,卷积核大小3x3,步长为1,激活函数为relu,填充至大小相等
#Output 112x112x128
x = Conv2D(filters=128,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第二部分最大池化
#Input 112x112x128
#Output 56x56x128
x = MaxPooling2D()(x)
#第三部分卷积
#Input 56x56x128
#Output 56x56x256
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第三部分最大池化
#Input 56x56x256
#Output 28x28x256
x = MaxPooling2D()(x)
#第四部分卷积
#Input 28x28x256
#Output 28x28x512
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第四部分最大池化
#Input 28x28x512
#Output 14x14x512
x = MaxPooling2D()(x)
#第五部分卷积
#Input 14x14x512
#Output 14x14x512
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第五部分最大池化
#Input 14x14x512
#Output 7x7x512
x = MaxPooling2D()(x)
#三个全连接层(其中有添加Flatten层作为数据展平,不过论文中没有写)
x = Flatten()(x)
x = Dense(4096,activation = 'relu',kernel_regularizer=L2(5*(10)**(-4)))(x)
x = Dropout(0.5)(x) # 添加 Dropout 层,丢弃比例为 0.5
x = Dense(4096,activation = 'relu',kernel_regularizer=L2(5*(10)**(-4)))(x)#前两个全连接层添加系数为5*(10)**(-4)的L2正则化
x = Dropout(0.5)(x) # 添加 Dropout 层,丢弃比例为 0.5
outputs = Dense(1000,activation = 'softmax')(x)
#返回输出值
return outputs
#定义B模型架构
#13 weight layers
#对于其他网络,与前一网络重合部分沿用前一网络权重,其他网络层权重用正态分布进行初始化,偏置全部初始化为0
def VGG_B(inputs):
#第一部分卷积
#Input 224x224x3
#64个卷积核,卷积核大小3x3,步长为1,激活函数为relu,填充至大小相等
#Output 224x224x64
x = Conv2D(filters=64,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(inputs)
x = Conv2D(filters=64,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第一部分池化
#Input 224x224x64
#Output 112x112x64
x = MaxPooling2D()(x)
#第二部分卷积
#Input 112x112x64
#128个卷积核,卷积核大小3x3,步长为1,激活函数为relu,填充至大小相等
#Output 112x112x128
x = Conv2D(filters=128,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=128,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第二部分最大池化
#Input 112x112x128
#Output 56x56x128
x = MaxPooling2D()(x)
#第三部分卷积
#Input 56x56x128
#Output 56x56x256
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第三部分最大池化
#Input 56x56x256
#Output 28x28x256
x = MaxPooling2D()(x)
#第四部分卷积
#Input 28x28x256
#Output 28x28x512
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第四部分最大池化
#Input 28x28x512
#Output 14x14x512
x = MaxPooling2D()(x)
#第五部分卷积
#Input 14x14x512
#Output 14x14x512
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第五部分最大池化
#Input 14x14x512
#Output 7x7x512
x = MaxPooling2D()(x)
#三个全连接层(其中有添加Flatten层作为数据展平,不过论文中没有写)
x = Flatten()(x)
x = Dense(4096,activation = 'relu',kernel_regularizer=L2(5*(10)**(-4)))(x)
x = Dropout(0.5)(x) # 添加 Dropout 层,丢弃比例为 0.5
x = Dense(4096,activation = 'relu',kernel_regularizer=L2(5*(10)**(-4)))(x)#前两个全连接层添加系数为5*(10)**(-4)的L2正则化
x = Dropout(0.5)(x) # 添加 Dropout 层,丢弃比例为 0.5
outputs = Dense(1000,activation = 'softmax')(x)
#返回输出值
return outputs
#定义C模型架构
#16 weight layers
#对于其他网络,与前一网络重合部分沿用前一网络权重,其他网络层权重用正态分布进行初始化,偏置全部初始化为0
def VGG_C(inputs):
#第一部分卷积
#Input 224x224x3
#64个卷积核,卷积核大小3x3,步长为1,激活函数为relu,填充至大小相等
#Output 224x224x64
x = Conv2D(filters=64,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(inputs)
x = Conv2D(filters=64,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第一部分池化
#Input 224x224x64
#Output 112x112x64
x = MaxPooling2D()(x)
#第二部分卷积
#Input 112x112x64
#128个卷积核,卷积核大小3x3,步长为1,激活函数为relu,填充至大小相等
#Output 112x112x128
x = Conv2D(filters=128,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=128,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第二部分最大池化
#Input 112x112x128
#Output 56x56x128
x = MaxPooling2D()(x)
#第三部分卷积
#Input 56x56x128
#Output 56x56x256
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=256,kernel_size=(1,1),strides=(1,1),activation='relu',padding='same')(x)
#第三部分最大池化
#Input 56x56x256
#Output 28x28x256
x = MaxPooling2D()(x)
#第四部分卷积
#Input 28x28x256
#Output 28x28x512
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(1,1),strides=(1,1),activation='relu',padding='same')(x)
#第四部分最大池化
#Input 28x28x512
#Output 14x14x512
x = MaxPooling2D()(x)
#第五部分卷积
#Input 14x14x512
#Output 14x14x512
#1×1 卷积的参数量 = 输入通道 × 输出通道(再+bias)
#parameters = kernel_h × kernel_w × in_channels × out_channels + out_channels (bias)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(1,1),strides=(1,1),activation='relu',padding='same')(x)
#第五部分最大池化
#Input 14x14x512
#Output 7x7x512
x = MaxPooling2D()(x)
#三个全连接层(其中有添加Flatten层作为数据展平,不过论文中没有写)
x = Flatten()(x)
x = Dense(4096,activation = 'relu',kernel_regularizer=L2(5*(10)**(-4)))(x)
x = Dropout(0.5)(x) # 添加 Dropout 层,丢弃比例为 0.5
x = Dense(4096,activation = 'relu',kernel_regularizer=L2(5*(10)**(-4)))(x)#前两个全连接层添加系数为5*(10)**(-4)的L2正则化
x = Dropout(0.5)(x) # 添加 Dropout 层,丢弃比例为 0.5
outputs = Dense(1000,activation = 'softmax')(x)
#返回输出值
return outputs
#定义D模型架构
#16 weight layers
#对于其他网络,与前一网络重合部分沿用前一网络权重,其他网络层权重用正态分布进行初始化,偏置全部初始化为0
def VGG_D(inputs):
#第一部分卷积
#Input 224x224x3
#64个卷积核,卷积核大小3x3,步长为1,激活函数为relu,填充至大小相等
#Output 224x224x64
x = Conv2D(filters=64,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(inputs)
x = Conv2D(filters=64,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第一部分池化
#Input 224x224x64
#Output 112x112x64
x = MaxPooling2D()(x)
#第二部分卷积
#Input 112x112x64
#128个卷积核,卷积核大小3x3,步长为1,激活函数为relu,填充至大小相等
#Output 112x112x128
x = Conv2D(filters=128,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=128,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第二部分最大池化
#Input 112x112x128
#Output 56x56x128
x = MaxPooling2D()(x)
#第三部分卷积
#Input 56x56x128
#Output 56x56x256
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第三部分最大池化
#Input 56x56x256
#Output 28x28x256
x = MaxPooling2D()(x)
#第四部分卷积
#Input 28x28x256
#Output 28x28x512
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第四部分最大池化
#Input 28x28x512
#Output 14x14x512
x = MaxPooling2D()(x)
#第五部分卷积
#Input 14x14x512
#Output 14x14x512
#1×1 卷积的参数量 = 输入通道 × 输出通道(再+bias)
#parameters = kernel_h × kernel_w × in_channels × out_channels + out_channels (bias)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第五部分最大池化
#Input 14x14x512
#Output 7x7x512
x = MaxPooling2D()(x)
#三个全连接层(其中有添加Flatten层作为数据展平,不过论文中没有写)
x = Flatten()(x)
x = Dense(4096,activation = 'relu',kernel_regularizer=L2(5*(10)**(-4)))(x)
x = Dropout(0.5)(x) # 添加 Dropout 层,丢弃比例为 0.5
x = Dense(4096,activation = 'relu',kernel_regularizer=L2(5*(10)**(-4)))(x)#前两个全连接层添加系数为5*(10)**(-4)的L2正则化
x = Dropout(0.5)(x) # 添加 Dropout 层,丢弃比例为 0.5
outputs = Dense(1000,activation = 'softmax')(x)
#返回输出值
return outputs
#定义E模型架构
#19 weight layers
#对于其他网络,与前一网络重合部分沿用前一网络权重,其他网络层权重用正态分布进行初始化,偏置全部初始化为0(权重迁移)
def VGG_E(inputs):
#第一部分卷积
#Input 224x224x3
#64个卷积核,卷积核大小3x3,步长为1,激活函数为relu,填充至大小相等
#Output 224x224x64
x = Conv2D(filters=64,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(inputs)
x = Conv2D(filters=64,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第一部分池化
#Input 224x224x64
#Output 112x112x64
x = MaxPooling2D()(x)
#第二部分卷积
#Input 112x112x64
#128个卷积核,卷积核大小3x3,步长为1,激活函数为relu,填充至大小相等
#Output 112x112x128
x = Conv2D(filters=128,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=128,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第二部分最大池化
#Input 112x112x128
#Output 56x56x128
x = MaxPooling2D()(x)
#第三部分卷积
#Input 56x56x128
#Output 56x56x256
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第三部分最大池化
#Input 56x56x256
#Output 28x28x256
x = MaxPooling2D()(x)
#第四部分卷积
#Input 28x28x256
#Output 28x28x512
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第四部分最大池化
#Input 28x28x512
#Output 14x14x512
x = MaxPooling2D()(x)
#第五部分卷积
#Input 14x14x512
#Output 14x14x512
#1×1 卷积的参数量 = 输入通道 × 输出通道(再+bias)
#parameters = kernel_h × kernel_w × in_channels × out_channels + out_channels (bias)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
x = Conv2D(filters=512,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same')(x)
#第五部分最大池化
#Input 14x14x512
#Output 7x7x512
x = MaxPooling2D()(x)
#三个全连接层(其中有添加Flatten层作为数据展平,不过论文中没有写)
x = Flatten()(x)
x = Dense(4096,activation = 'relu',kernel_regularizer=L2(5*(10)**(-4)))(x)
x = Dropout(0.5)(x) # 添加 Dropout 层,丢弃比例为 0.5
x = Dense(4096,activation = 'relu',kernel_regularizer=L2(5*(10)**(-4)))(x)#前两个全连接层添加系数为5*(10)**(-4)的L2正则化
x = Dropout(0.5)(x) # 添加 Dropout 层,丢弃比例为 0.5
outputs = Dense(1000,activation = 'softmax')(x)
#返回输出值
return outputs
"""
3、训练测试
"""
if __name__ == "__main__":
# 设置混合精度策略
policy = tf.keras.mixed_precision.Policy('mixed_float16')
tf.keras.mixed_precision.set_global_policy(policy)
#定义模型架构
#定义输入层
#输入224x224x3的图片,批次为256(batch_size,height,width,channels)
inputs = Input(shape=(224,224,3),name='input_layer',batch_size=128)
#构建模型vgg_a
outputs = VGG_A(inputs)
vgg_a = Model(inputs = inputs, outputs = outputs)
#小批量梯度下降(SGD小批量随机梯度下降),学习率0.01(10**(-2)),动量下降动量0.9
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)#手动编辑优化器和学习率,后续的网络使用相同的优化器
#编译模型
vgg_a.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy',metrics=['accuracy'])
#打印模型架构并输出图片
vgg_a.summary()
tf.keras.utils.plot_model(vgg_a, to_file='VGG_A.png', show_shapes=True)
'''
#构建模型vgg_a_lrn
outputs = VGG_A_LRN(inputs)
vgg_a_lrn = Model(inputs = inputs, outputs = outputs)
#编译模型
vgg_a_lrn.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy',metrics=['accuracy'])
#打印模型架构并输出图片
vgg_a_lrn.summary()
tf.keras.utils.plot_model(vgg_a_lrn, to_file='VGG_A_LRN.png', show_shapes=True)
#构建模型vgg_b
outputs = VGG_B(inputs)
vgg_b = Model(inputs = inputs,outputs = outputs)
#编译模型
vgg_b.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy',metrics=['accuracy'])
#打印模型架构并输出图片
vgg_b.summary()
tf.keras.utils.plot_model(vgg_b, to_file='VGG_B.png', show_shapes=True)
#构建模型vgg_c
outputs = VGG_C(inputs)
vgg_c = Model(inputs = inputs,outputs = outputs)
#编译模型
vgg_c.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy',metrics=['accuracy'])
#打印模型架构并输出图片
vgg_c.summary()
tf.keras.utils.plot_model(vgg_c, to_file='VGG_C.png', show_shapes=True)
#构建模型vgg_d
outputs = VGG_D(inputs)
vgg_d = Model(inputs = inputs,outputs = outputs)
#编译模型
vgg_d.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy',metrics=['accuracy'])
#打印模型架构并输出图片
vgg_d.summary()
tf.keras.utils.plot_model(vgg_d, to_file='VGG_D.png', show_shapes=True)
#构建模型vgg_e
outputs = VGG_E(inputs)
vgg_e = Model(inputs = inputs,outputs = outputs)
#编译模型
vgg_e.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy',metrics=['accuracy'])
#打印模型架构并输出图片
vgg_e.summary()
tf.keras.utils.plot_model(vgg_e, to_file='VGG_E.png', show_shapes=True)
'''
'''
"""
#检查GPU是否有用
import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))
#自动分配 GPU 内存,TensorFlow 会自动分配 GPU 内存,但可以通过以下代码手动设置:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
except RuntimeError as e:
print(e)
#在训练模型时,TensorFlow 会自动利用 GPU 加速,无需额外设置
#使用混合精度训练,混合精度训练可以进一步加速训练过程并减少内存占用:(混合精度训练是一种在深度学习中常用的加速训练的技术,它结合了单精度浮点数(float32)和半精度浮点数(float16)的优势,以达到更快的训练速度和更低的内存占用,同时尽量减少精度损失。)
from tensorflow.keras.mixed_precision import experimental as mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)
"""
#训练模型
history = model.fit(train_data,
epochs=10,
batch_size=32,
validation_data=val_data)
#评估模型
test_loss, test_acc = model.evaluate(test_data)
print(f'Test accuracy: {test_acc}')
#保存模型
model.save('my_model.h5') # 保存整个模型
model.save_weights('my_model_weights.h5') # 只保存权重
'''
#数据预处理
train_dataset,validation_dataset,test_dataset=loadDataset()
# 定义 ReduceLROnPlateau 回调函数
reduce_lr = ReduceLROnPlateau(
monitor='val_accuracy', # 监控验证集的准确率
factor=0.1, # 学习率衰减因子
patience=5, # 等待多少个 epoch 后才调整学习率,连续3个epoch学习率没有提高就衰减
min_lr=1e-6, # 学习率的最小值
verbose=1 # 打印详细信息,在调整学习率的时候打印详细信息
)
#在fit中调用回调函数即可在相应监控状态执行
#训练模型
history = vgg_a.fit(train_dataset,
epochs=100,
validation_data=validation_dataset,
callbacks=[reduce_lr] # 添加回调函数
)
#评估模型
test_loss, test_acc = vgg_a.evaluate(test_dataset)
print(f'Test accuracy: {test_acc}')
#保存模型
vgg_a.save('my_model.h5') # 保存整个模型
#在每次训练或数据加载完成后,清理 TensorFlow 会话可以释放 GPU 内存。这有助于避免内存泄漏和重复分配问题。
tf.keras.backend.clear_session()
"""
# 计算代码的行数
code_lines = code.split('\n')
num_lines = len(code_lines)
# 根据代码行数动态调整图像高度
fig_height = max(5, num_lines * 0.18) # 每行代码占用 0.2 英寸,最小高度为 5 英寸
fig, ax = plt.subplots(figsize=(15, fig_height))
# 绘制代码
ax.text(0, 0, code, fontsize=12, ha='left', va='bottom', wrap=True)
ax.axis('off')
plt.tight_layout()
# 保存图片
plt.savefig('code_image.png')
plt.close()
"""
这我代码"D:\2025College Student Innovation and Entrepreneurship Project\environment\py3.6(tf2.6+cuda11.2+cudnn8.1)\Scripts\python.exe" "D:/2025College Student Innovation and Entrepreneurship Project/project/VGG/VGG.py"
显存动态分配成功!
2025-08-12 18:23:05.473429: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-08-12 18:23:05.593371: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5563 MB memory: -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_layer (InputLayer) [(128, 224, 224, 3)] 0
_________________________________________________________________
conv2d (Conv2D) (128, 224, 224, 64) 1792
_________________________________________________________________
max_pooling2d (MaxPooling2D) (128, 112, 112, 64) 0
_________________________________________________________________
conv2d_1 (Conv2D) (128, 112, 112, 128) 73856
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (128, 56, 56, 128) 0
_________________________________________________________________
conv2d_2 (Conv2D) (128, 56, 56, 256) 295168
_________________________________________________________________
conv2d_3 (Conv2D) (128, 56, 56, 256) 590080
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (128, 28, 28, 256) 0
_________________________________________________________________
conv2d_4 (Conv2D) (128, 28, 28, 512) 1180160
_________________________________________________________________
conv2d_5 (Conv2D) (128, 28, 28, 512) 2359808
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (128, 14, 14, 512) 0
_________________________________________________________________
conv2d_6 (Conv2D) (128, 14, 14, 512) 2359808
_________________________________________________________________
conv2d_7 (Conv2D) (128, 14, 14, 512) 2359808
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (128, 7, 7, 512) 0
_________________________________________________________________
flatten (Flatten) (128, 25088) 0
_________________________________________________________________
dense (Dense) (128, 4096) 102764544
_________________________________________________________________
tf.nn.dropout (TFOpLambda) (128, 4096) 0
_________________________________________________________________
dense_1 (Dense) (128, 4096) 16781312
_________________________________________________________________
tf.nn.dropout_1 (TFOpLambda) (128, 4096) 0
_________________________________________________________________
dense_2 (Dense) (128, 1000) 4097000
=================================================================
Total params: 132,863,336
Trainable params: 132,863,336
Non-trainable params: 0
_________________________________________________________________
2025-08-12 18:23:07.011400: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
(128, 224, 224, 3) (128,)
Epoch 1/100
2025-08-12 18:23:08.311120: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8600
7/8 [=========================>....] - ETA: 0s - loss: 12.8838 - accuracy: 0.00222025-08-12 18:23:45.369397: W tensorflow/core/framework/op_kernel.cc:1680] Invalid argument: required broadcastable shapes
Traceback (most recent call last):
File "D:/2025College Student Innovation and Entrepreneurship Project/project/VGG/VGG.py", line 1002, in <module>
callbacks=[reduce_lr] # 添加回调函数
File "D:\2025College Student Innovation and Entrepreneurship Project\environment\py3.6(tf2.6+cuda11.2+cudnn8.1)\lib\site-packages\keras\engine\training.py", line 1184, in fit
tmp_logs = self.train_function(iterator)
File "D:\2025College Student Innovation and Entrepreneurship Project\environment\py3.6(tf2.6+cuda11.2+cudnn8.1)\lib\site-packages\tensorflow\python\eager\def_function.py", line 885, in __call__
result = self._call(*args, **kwds)
File "D:\2025College Student Innovation and Entrepreneurship Project\environment\py3.6(tf2.6+cuda11.2+cudnn8.1)\lib\site-packages\tensorflow\python\eager\def_function.py", line 917, in _call
return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable
File "D:\2025College Student Innovation and Entrepreneurship Project\environment\py3.6(tf2.6+cuda11.2+cudnn8.1)\lib\site-packages\tensorflow\python\eager\function.py", line 3040, in __call__
filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access
File "D:\2025College Student Innovation and Entrepreneurship Project\environment\py3.6(tf2.6+cuda11.2+cudnn8.1)\lib\site-packages\tensorflow\python\eager\function.py", line 1964, in _call_flat
ctx, args, cancellation_manager=cancellation_manager))
File "D:\2025College Student Innovation and Entrepreneurship Project\environment\py3.6(tf2.6+cuda11.2+cudnn8.1)\lib\site-packages\tensorflow\python\eager\function.py", line 596, in call
ctx=ctx)
File "D:\2025College Student Innovation and Entrepreneurship Project\environment\py3.6(tf2.6+cuda11.2+cudnn8.1)\lib\site-packages\tensorflow\python\eager\execute.py", line 60, in quick_execute
inputs, attrs, num_outputs)
tensorflow.python.framework.errors_impl.InvalidArgumentError: required broadcastable shapes
[[node model/tf.nn.dropout/dropout/Mul_1 (defined at /2025College Student Innovation and Entrepreneurship Project/project/VGG/VGG.py:1002) ]] [Op:__inference_train_function_5416]
Function call stack:
train_function
Process finished with exit code 1
以上是报错,由啥问题
最新发布