kg cifar10笔记

13.13. 实战 Kaggle 比赛:图像分类 (CIFAR-10) — 动手学深度学习 2.0.0 documentation

没写完先存了

读文件部分

看看文件夹结构就可以懂了

import collections
import math
import os
import shutil
import pandas as pd
import torch
import torchvision
from torch import nn
from d2l import torch as d2l


#@save
d2l.DATA_HUB['cifar10_tiny'] = (d2l.DATA_URL + 'kaggle_cifar10_tiny.zip','2068874e4b9a9f0fb07ebe0ad2b29754449ccacd')

# 如果使用完整的Kaggle竞赛的数据集,设置demo为False
demo = True

if demo:#'..\\data\\kaggle_cifar10_tiny'
    data_dir = d2l.download_extract('cifar10_tiny')
else:
    data_dir = '../data/cifar-10/'

#@save
def read_csv_labels(fname):
    """读取fname来给标签字典返回一个文件名"""
    with open(fname, 'r') as f:
        # 跳过文件头行(列名)
        lines = f.readlines()[1:]
    tokens = [l.rstrip().split(',') for l in lines] # lstrip:用来去除开头字符、空白符
    return dict(((name, label) for name, label in tokens))

labels = read_csv_labels(os.path.join(data_dir, 'trainLabels.csv'))
#labels 的样子{'1': 'frog', '2': 'truck', '3': 'truck', '4': 'deer', ....
print('# 训练样本 :', len(labels))
print('# 类别 :', len(set(labels.values())))


#@save
def read_csv_labels(fname):
    """读取fname来给标签字典返回一个文件名"""
    with open(fname, 'r') as f:
        # 跳过文件头行(列名)
        lines = f.readlines()[1:]
    tokens = [l.rstrip().split(',') for l in lines]
    return dict(((name, label) for name, label in tokens))

labels = read_csv_labels(os.path.join(data_dir, 'trainLabels.csv'))
print('# 训练样本 :', len(labels))
print('# 类别 :', len(set(labels.values())))

#@save
def reorg_train_valid(data_dir, labels, valid_ratio):
    """将验证集从原始的训练集中拆分出来"""
    # 训练数据集中样本最少的类别中的样本数
    n = collections.Counter(labels.values()).most_common()[-1][1]
    # 验证集中每个类别的样本数
    n_valid_per_label = max(1, math.floor(n * valid_ratio))
    label_count = {}
    for train_file in os.listdir(os.path.join(data_dir, 'train')):
        label = labels[train_file.split('.')[0]]
        fname = os.path.join(data_dir, 'train', train_file)
        copyfile(fname, os.path.join(data_dir, 'train_valid_test',
                                     'train_valid', label))
        if label not in label_count or label_count[label] < n_valid_per_label:
            copyfile(fname, os.path.join(data_dir, 'train_valid_test',
                                         'valid', label))
            label_count[label] = label_count.get(label, 0) + 1
        else:
            copyfile(fname, os.path.join(data_dir, 'train_valid_test',
                                         'train', label))
    return n_valid_per_label


#@save
def reorg_test(data_dir):
    """在预测期间整理测试集,以方便读取"""
    for test_file in os.listdir(os.path.join(data_dir, 'test')):
        copyfile(os.path.join(data_dir, 'test', test_file),
                 os.path.join(data_dir, 'train_valid_test', 'test',
                              'unknown'))


def reorg_cifar10_data(data_dir, valid_ratio):
    labels = read_csv_labels(os.path.join(data_dir, 'trainLabels.csv'))
    reorg_train_valid(data_dir, labels, valid_ratio)
    reorg_test(data_dir)


batch_size = 32 if demo else 128
valid_ratio = 0.1
reorg_cifar10_data(data_dir, valid_ratio)

火星笔记本(Mars Notebook)是一款类似于Jupyter Notebook的代码编辑和数据科学探索工具。要使用火星笔记本对数据集CIFAR10进行分类,你通常需要编写一些Python代码,利用深度学习框架如TensorFlow或PyTorch来构建和训练模型。以下是一个简化的例子,展示了如何使用TensorFlow和Keras来实现这一过程: 首先,你需要导入所需的库: ```python import tensorflow as tf from tensorflow.keras.datasets import cifar10 from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout from tensorflow.keras.utils import to_categorical ``` 接着,加载数据集并进行预处理: ```python # 加载CIFAR10数据集 (train_images, train_labels), (test_images, test_labels) = cifar10.load_data() # 将像素值标准化到0-1之间 train_images, test_images = train_images / 255.0, test_images / 255.0 # 将标签转换为one-hot编码 train_labels = to_categorical(train_labels) test_labels = to_categorical(test_labels) ``` 然后,构建卷积神经网络(CNN)模型: ```python model = Sequential() model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3))) model.add(MaxPooling2D((2, 2))) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D((2, 2))) model.add(Conv2D(64, (3, 3), activation='relu')) # 添加全连接层 model.add(Flatten()) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) ``` 接下来,编译模型: ```python model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) ``` 训练模型: ```python history = model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels)) ``` 最后,评估模型性能: ```python test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2) print('\nTest accuracy:', test_acc) ``` 这段代码提供了一个基本的CNN模型,用于训练和测试CIFAR10数据集。实际应用中,你可能需要调整网络结构、超参数、数据增强等,以获得更好的性能。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值