cafar10 | kaggle | tensorflow
导包
import sklearn
import tensorflow as tf
import matplotlib as mpl
import matplotlib. pyplot as plt
% matplotlib inline
import numpy as np
import pandas as pd
import os
import sys
import time
from tensorflow import keras
print ( tf. __version__)
print ( sys. version_info)
for module in mpl, np, pd, sklearn, tf, keras:
print ( module. __name__, module. __version__)
数据读入
class_names = [
'airplane' ,
'automobile' ,
'bird' ,
'cat' ,
'deer' ,
'dog' ,
'frog' ,
'horse' ,
'ship' ,
'truck' ,
]
train_lables_file = "../input/cifar10-object-recognition-in-images-zip-file/trainLabels.csv"
test_csv_file = "../input/cifar10-object-recognition-in-images-zip-file/sampleSubmission.csv"
train_folder = "../input/cifar10-object-recognition-in-images-zip-file/train_test/train/train/"
test_folder = "../input/cifar10-object-recognition-in-images-zip-file/train_test/test/test/"
def parse_csv_file ( filepath, folder) :
"""Parses csv files into (filename(path), label) format"""
results = [ ]
with open ( filepath, 'r' ) as f:
lines = f. readlines( ) [ 1 : ]
for line in lines:
image_id, label_str = line. strip( '\n' ) . split( ',' )
image_full_path = os. path. join( folder, image_id + '.png' )
results. append( ( image_full_path, label_str) )
return results
train_label_info = parse_csv_file( train_lables_file, train_folder)
test_csv_info = parse_csv_file( test_csv_file, test_folder)
import pprint
pprint. pprint( train_label_info[ 0 : 5 ] )
pprint. pprint( test_csv_info[ 0 : 5 ] )
print ( len ( train_label_info) , len ( test_csv_info) )
DataFrame数据读入
height = 32
width = 32
channels = 3
batch_size = 32
num_classes = 10
train_datagen = keras. preprocessing. image. ImageDataGenerator(
rescale = 1 . / 255 ,
rotation_range = 40 ,
width_shift_range = 0.2 ,
height_shift_range = 0.2 ,
shear_range = 0.2 ,
zoom_range = 0.2 ,
horizontal_flip = True ,
fill_mode = "nearest"
)
train_generator = train_datagen. flow_from_dataframe(
train_df,
directory = './' ,
x_col = 'filepath' ,
y_col = 'class' ,
classes = class_names,
target_size = ( height, width) ,
batch_size = batch_size,
seed = 7 ,
shuffle = True ,
clss_mode = 'sparse' )
valid_datagen = keras. preprocessing. image. ImageDataGenerator( rescale = 1 . / 255 )
valid_generator = valid_datagen. flow_from_dataframe(
valid_df,
directory = './' ,
x_col = 'filepath' ,
y_col = 'class' ,
classes = class_names,
target_size = ( height, width) ,
batch_size = batch_size,
seed = 7 ,
shuffle = True ,
clss_mode = 'sparse' )
train_num = train_generator. samples
valid_num = valid_generator. samples
print ( train_num, valid_num)
构建模型
model = keras. models. Sequential( [
keras. layers. Conv2D( filters= 128 , kernel_size= 3 , padding= 'same' , activation= 'relu' , input_shape= [ width, height, channels] ) ,
keras. layers. BatchNormalization( ) ,
keras. layers. Conv2D( filters= 128 , kernel_size= 3 , padding= 'same' , activation= 'relu' ) ,
keras. layers. BatchNormalization( ) ,
keras. layers. MaxPool2D( pool_size= 2 ) ,
keras. layers. Conv2D( filters= 256 , kernel_size= 3 , padding= 'same' , activation= 'relu' ) ,
keras. layers. BatchNormalization( ) ,
keras. layers. Conv2D( filters= 256 , kernel_size= 3 , padding= 'same' , activation= 'relu' ) ,
keras. layers. BatchNormalization( ) ,
keras. layers. MaxPool2D( pool_size= 2 ) ,
keras. layers. Conv2D( filters= 512 , kernel_size= 3 , padding= 'same' , activation= 'relu' ) ,
keras. layers. BatchNormalization( ) ,
keras. layers. Conv2D( filters= 512 , kernel_size= 3 , padding= 'same' , activation= 'relu' ) ,
keras. layers. BatchNormalization( ) ,
keras. layers. MaxPool2D( pool_size= 2 ) ,
keras. layers. Flatten( ) ,
keras. layers. Dense( 512 , activation= 'relu' ) ,
keras. layers. Dense( num_classes, activation= 'softmax' )
] )
model. compile ( loss= "sparse_categorical_crossentropy" , optimizer= "adam" , metrics= [ "accuracy" ] )
model. summary( )
训练模型
epochs = 10
history = model. fit_generator( train_generator,
steps_per_epoch= train_num // batch_size,
epochs= epochs,
validation_data= valid_generator,
validation_steps= valid_num // batch_size)
可视化