目录
1.1问题描述:
1.2预期解决方案:
2.2读取图像数据,按照7:3的比例划分训练集和测试集并转化为tensor的float16
2.3读取标签数据,按照7:3的比例划分训练集和测试集并转化为tensor的float16
1.1问题描述:
杂草是农业经营中不受欢迎的入侵者,它们通过窃取营养、水、土地和其他关键资源来破坏种植,这些入侵者会导致产量下降和资源部署效率低下。一种已知的方法是使用杀虫剂来清除杂草,但杀虫剂会给人类带来健康风险。我们的目标是利用计算机视觉技术可以自动检测杂草的存在,开发一种只在杂草上而不是在作物上喷洒农药的系统,并使用针对性的修复技术将其从田地中清除,从而最小化杂草对环境的负面影响。
1.2预期解决方案:
我们期待您将其部署到模拟的生产环境中——这里推理时间和二分类准确度(F1分数)将作为评分的主要依据。
1.3数据集
https://filerepo.idzcn.com/dataset/assignment_2.zip
1.4图像展示
作物
杂草
标签(labels):
作物(crop):0 0.480469 0.494141 0.957031 0.808594z
杂(weed):1 0.508789 0.489258 0.869141 0.861328
2:数据预处理
2.1将文件名写入data.txt中
# 指定图片所在的文件夹路径
image_folder = '../weeding/data'
# 获取文件夹下所有以.jpeg结尾的文件
image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpeg') or f.endswith('.txt')]
# 提取文件名的前缀并保存到data.txt
with open('../weeding/data.txt', 'w') as file:
for filename in image_files:
# 获取文件名的前缀
prefix = os.path.splitext(filename)[0]
# 写入前缀到data.txt文件
file.write(prefix + '\n')
2.2读取图像数据,按照7:3的比例划分训练集和测试集并转化为tensor的float16
transformer = transforms.Compose([
transforms.ToTensor(),
transforms.ColorJitter(contrast=0.5), # 增强对比度
transforms.Normalize(mean=[0.5], std=[0.5]) # 归一化
])
train_images_tensor = []
with open(r'../weeding/data.txt','r') as f:
file_name_url=[i.split('\n')[0] for i in f.readlines()]
for i in range(len(file_name_url)):
image = Image.open('../weeding/data/'+file_name_url[i]+'.jpeg')
tensor = transformer(image.convert('L')).type(torch.float16)
train_images_tensor.append(tensor)
image_train = []
image_test = []
for i in range(len(train_images_tensor)):
if i <=len(train_images_tensor)*0.7:
image_train.append(train_images_tensor[i])
else:image_test.append(train_images_tensor[i])
2.3读取标签数据,按照7:3的比例划分训练集和测试集并转化为tensor的float16
transformerlab = transforms.Compose([
transforms.ToTensor()
])
train_lables_tensor = []
with open(r'../weeding/data.txt','r') as f:
file_name_url=[i.split('\n')[0] for i in f.readlines()]
train_lables_tensor = []
for i in range(len(file_name_url)):
image = open('../weeding/data/' + file_name_url[i] + '.txt')
labels = image.readline()[0]
labels = float(labels)
tensor = torch.tensor(labels, dtype=torch.float16) # 使用float16数据类型
train_lables_tensor.append(tensor)
lables_train = []
lables_test = []
for i in range(len(train_lables_tensor)):
if i <=len(train_lables_tensor)*0.7:
lables_train.append(train_lables_tensor[i])
else:lables_test.append(train_lables_tensor[i])
2.4做数据集
train_datas_tensor = torch.stack(image_train)
train_labels_tensor = torch.stack(lables_train)
test_datas_tensor = torch.stack(image_test)
test_labels_tensor = torch.stack(lables_test)
train_dataset = TensorDataset(train_labels_tensor, train_datas_tensor)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataset = TensorDataset(test_labels_tensor, test_datas_tensor)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)
3:ResNet网络
3.1自写ResNet网络
class Residual(nn.Module):
def __init__(self, input_channels, num_channels, use_conv=False, strides=1):
super().__init__()
self.conv1 = nn.Conv2d(input_channels, num_channels, kernel_size=3, padding=1, stride=strides)
self.conv2 = nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1)
if use_conv:
self.conv3 = nn.Conv2d(input_channels, num_channels, kernel_size=1, stride=strides)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm2d(num_channels)
self.bn2 = nn.BatchNorm2d(num_channels)
def forward(self, X):
Y = F.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
Y += X
return F.relu(Y)
b1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
def resnet_block(input_channels, num_channels, num_residuals, first_block=False):
blk = []
for i in range(num_residuals):
if i == 0 and not first_block:
blk.append(Residual(input_channels, num_channels, use_conv=True, strides=2))
else:
blk.append(Residual(num_channels, num_channels))
return blk
b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
b3 = nn.Sequential(*resnet_block(64, 128, 2))
b4 = nn.Sequential(*resnet_block(128, 256, 2))
b5 = nn.Sequential(*resnet_block(256, 512, 2))
net = nn.Sequential(b1, b2, b3, b4, b5,
nn.AdaptiveAvgPool2d((1, 1)),
nn.Flatten(), nn.Linear(512, 10))
3.1通过导包的方式直接使用ResNet50(这里使用了OneAPI的组件进行加速)
net = torchvision.models.resnet50(pretrained=True)
net.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
num_features = net.fc.in_features
net.fc = nn.Linear(num_features, 2)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device).float()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
4:训练
4.1进行训练,以均方误差作为损失函数(迭代19次)
for epoch in range(1, 19):
running_loss = 0.0
num_images = 0
loop = tqdm(enumerate(train_dataloader, 0))
for step, data in loop:
labels, inputs = data[0].to('cpu').float(), data[1].to('cpu').float()
# labels, inputs = data[0].float(), data[1].float()
optimizer.zero_grad()
inputs = inputs.float()
outputs = net(inputs)
# 创建包含相同数量的目标值的示例目标张量
target = labels # 使用实际标签作为目标
# 使用 MSE 损失函数
loss = criterion(outputs, target.long())
loss.backward()
optimizer.step()
num_images += inputs.size(0)
running_loss += loss.item()
loop.set_description(f'Epoch [{epoch}/10]')
loop.set_postfix(loss=running_loss / (step + 1))
print('Finish!!!')
4.2查看test数据集F1分数及时间
correct = 0
total = 0
# 开始推理
all_predictions = []
all_labels = []
start_time = time.time()
with torch.no_grad():
for data in test_dataloader:
images, labels = data[1].to('cpu').float(), data[0].to('cpu').long() # 将标签转换为整数类型
net = net.float()
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
all_predictions.extend(predicted.cpu().numpy())
all_labels.extend(labels.cpu().numpy())
accuracy = 100 * correct / total
# 计算推理时间
inference_time = time.time() - start_time
f1 = f1_score(all_labels, all_predictions, average='binary')
print(f'F1分数为: {f1:.4f}')
print(f'Accuracy on test set: {accuracy:.2f}%')
print(f"Inference Time: {inference_time} seconds")
五:OneAPI组件的使用
Intel Optimization for PyTorch: PyTorch优化套件支持自动混合精度,这有助于减少模型的内存占用,提高计算性能。
Intel Extension for PyTorch :提供了专门针对英特尔 CPU 和加速器的硬件优化,以充分利用英特尔处理器的性能。这些优化可以显著提高深度学习模型的推理和训练性能。
六:总结
努力将其改成GPU运行但是一直说没有找到驱动程序,努力无果后选择CPU。训练19次,耗时5小时,非常慢,但最后的结果比较好。