来源:投稿 作者:LSC
编辑:学姐
比赛网址
https://xihe.mindspore.cn/competition/3/0/introduction
赛题任务
图像分类是计算机视觉中最基础的任务,目前图像分类的算法仍然在飞速发展。本赛题旨在让参赛者熟悉MindSpore并锻炼参赛者使用MindSpore进行图像分类预处理、图像分类的能力。 同时为了考察参赛者应对大量数据的处理能力,本赛题采用Celtech多类别图像数据集。
注意事项
本次比赛只能使用基于昇思MindSpore框架实现的深度神经网络模型
Mindspore深度学习框架是华为开发的,总体类似于tensorflow2.0和pytorch,不过目前体验不太好,生态也不成熟,目前还不如国内百度的paddlepaddle
我是在autodl平台上运行代码的,要先把数据集下载上传到平台上。由于autodl没有mindspore框架的镜像,所以要自己从0安装,参考mindspore官网的安装教程:https://gitee.com/mindspore/mindspore
Baseline代码如下
首先是安装环境
!git clone https://gitee.com/mindspore/mindspore.git
!pip install
https://ms-release.obs.cn-north-4.myhuaweicloud.com/1.8.1/MindSpore/gpu/x86_64/cuda-11.1/mindspore_gpu-1.8.1-cp38-cp38-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple
!pip install mindvision
解压数据集
!unzip autodl-nas/caltech_for_user.zip -d autodl-tmp/
查看256个类别的图像总数和每个类别的图像的数量
import os
num = {}
total = 0
train_path = "/root/autodl-tmp/caltech_for_user/train/"
for i in os.listdir(train_path):
train_dir = train_path + i
num[i] = len(os.listdir(train_dir))
total += num[i]
print(total)
print(num)
对数据集进行划分
import os
import shutil
print(len(os.listdir("/root/autodl-tmp/caltech_for_user/test")))
train_dir = "/root/autodl-tmp/caltech_for_user/train/"
val_dir = "/root/autodl-tmp/caltech_for_user/val/"
f = open("/root/autodl-tmp/total.txt", "w", encoding="utf-8")
for d in os.listdir(train_dir):
# print(d)
for img in os.listdir(train_dir + d):
# print(train_dir + d)
img_path = train_dir + d + "/" + img
# print(img_path)
f.write(img_path + "\n")
# break
f.close()
# shutil.move(img_path, )
from sklearn.utils import shuffle
f1 = open("/root/autodl-tmp/train.txt", "w", encoding="utf-8")
f2 = open("/root/autodl-tmp/val.txt", "w", encoding="utf-8")
f = open("/root/autodl-tmp/total.txt", "r", encoding="utf-8")
total = f.readlines()
f.close()
total = shuffle(total, random_state = 100)
ratio = 0.75
train_len = int(ratio * len(total))
train = total[:train_len]
val = total[train_len:]
for i in train:
f1.write(i)
for i in val:
f2.write(i)
f1.close()
f2.close()
import shutil
f = open("/root/autodl-tmp/val.txt", "r", encoding="utf-8")
for line in f.readlines():
l = line.strip()
t = l.split("/")
t[-3] = "val"
p = "/".join(t[0:-1])
if os.path.exists(p) == False:
os.mkdir(p)
new_p = "/".join(t)
shutil.move(l, new_p)
# break
f.close()
from mindvision.dataset import ImageNet
import mindspore as ms
from mindvision.classification.dataset import ImageNet
import mindspore.context as context
ms.set_context(mode=ms.GRAPH_MODE, device_target='GPU')
import mindspore.dataset as ds
import mindspore.dataset.vision as vision
from mindvision.engine.callback import ValAccMonitor
import mindspore.nn as nn
import numpy as np
读取数据集
class_indexing={}
for i in range(256):
class_indexing[str(i + 1)] = i
# class_indexing
train_dir = "/root/autodl-tmp/caltech_for_user/train/"
dataset_train = ds.ImageFolderDataset(train_dir, decode=True, class_indexing=class_indexing)
val_dir = "/root/autodl-tmp/caltech_for_user/val/"
dataset_val = ds.ImageFolderDataset(val_dir, decode=True, class_indexing=class_indexing)
数据增强
image_size = 224
batch_size = 16
mean = [0.5 * 255] * 3
std = [0.5 * 255] * 3
train_trans = [
vision.RandomColorAdjust(),
vision.RandomVerticalFlip(),
vision.RandomHorizontalFlip(),
vision.CutOut(10),
vision.GaussianBlur(5),
vision.MixUp(batch_size=16, alpha=0.1),
vision.Resize((image_size, image_size)),
# vision.CenterCrop(),
vision.Normalize(mean=mean, std=std),
vision.HWC2CHW()
]
val_trans = [
vision.Resize((image_size, image_size)),
vision.Normalize(mean=mean, std=std),
vision.HWC2CHW()
]
dataset_train = dataset_train.map(operations=train_trans, num_parallel_workers=1)
dataset_train = dataset_train.batch(batch_size, drop_remainder = True)
dataset_val = dataset_val.map(operations=val_trans, num_parallel_workers=1)
dataset_val = dataset_val.batch(batch_size, drop_remainder = True)
data = next(dataset_val.create_dict_iterator())
print(type(data['image']))
print(type(data['image'][0]))
print(len(data['image']))
# print(data['image'][0])
print(data['image'][0].shape)
# 定义优化器和损失函数
opt = nn.AdamWeightDecay(params=net.trainable_params(), learning_rate=0.001, weight_decay=0.01)
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
# 定义模型
import mindspore.nn as nn
# from mindvision.classification.models import resnet50
from mindvision.classification.models import mobilenet_v2
import mindspore as ms
net = resnet50(pretrained=True)
num_classes = 256
net.head.classifier = nn.Conv2d(in_channels=1280, out_channels=num_classes, kernel_size=(1, 1), stride=(1, 1))
# 定义优化器和损失函数
opt = nn.Momentum(params=net.trainable_params(), learning_rate=0.0001, momentum=0.9) # 把学习率设置小一点
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
# 实例化模型
model = ms.Model(net, loss, opt, metrics={"Accuracy": nn.Accuracy()})
from mindvision.engine.callback import ValAccMonitor
import mindspore as ms
num_epochs = 50
model.train(num_epochs,
dataset_train,
callbacks=[ValAccMonitor(model, dataset_val, num_epochs), ms.TimeMonitor()])
# 保存好 最优模型 降低学习率 继续训练
param_dict = ms.load_checkpoint("/root/autodl-tmp/resnet50/best.ckpt")
ms.load_param_into_net(net, param_dict)
# 定义优化器和损失函数
opt = nn.Momentum(params=net.trainable_params(), learning_rate=0.0001, momentum=0.9)
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
from mindvision.engine.callback import ValAccMonitor
import mindspore as ms
num_epochs = 50
model.train(num_epochs,
dataset_train,
callbacks=[ValAccMonitor(model, dataset_val, num_epochs), ms.TimeMonitor()])
import numpy as np
from PIL import Image
from mindspore import Tensor
def infer_img(path):
image = Image.open(path).convert("RGB")
image = image.resize((224, 224))
mean = np.array([0.5 * 255] * 3)
std = np.array([0.5 * 255] * 3)
image = np.array(image)
image = (image - mean) / std
image = image.astype(np.float32)
image = np.transpose(image, (2, 0, 1))
image = np.expand_dims(image, axis = 0)
pre = model.predict(Tensor(image))
result = np.argmax(pre)
return result
f = open("/root/autodl-tmp/val1.txt", "r", encoding="utf-8")
y_true = []
y_pred = []
for line in f.readlines():
p = line.strip()
# print(p)
r = infer_img(p) + 1
t = p.split("/")
y_true.append(int(t[5]))
# print(r)
y_pred.append(r)
# print("\n")
f.close()
import os
test = []
test_path = "/root/autodl-tmp/caltech_for_user/infer/"
for t in range(5120): # 按数字排序
test_img = test_path + str(t) + ".jpg"
test.append(test_img)
# test
f = open("/root/autodl-tmp/test.txt", "w", encoding="utf-8")
y_pred = []
for p in test:
r = infer_img(p) + 1
y_pred.append(r)
# print("\n")
f.write(str(p) + "\n")
f.close()
f = open("/root/autodl-tmp/ans.txt", "w", encoding="utf-8")
for i in y_pred:
f.write(str(i) + "\n")
f.close()
总的来说,这是很通常的做法,但是无论换什么模型,准确率最高都只能是85%左右,提交上去只有81.7%,后续复赛需要对数据进行清洗,使用更多的数据增强和模型融合等进行优化。
资料获取方式🚀🚀🚀
关注下方《学姐带你玩AI》发送“我会了”领取20+场比赛资料
码字不易,欢迎大家点赞评论收藏!