拿手写识别来说,按照tensorflow1的思路
__author__ = 'pc'
import os
os.environ['TF_CPP_MIN_LOG_LEVLE'] ='2'
import tensorflow as tf
from tensorflow.keras import datasets,layers,optimizers,Sequential,metrics
assert tf.__version__.startswith('2.')
#数据预处理
def preprocess(x,y):
#转换x,y的数据类型为张量
x = tf.cast(x,dtype=tf.float32)/255.
y = tf.cast(y,dtype=tf.int32)
return x,y
if __name__=='__main__':
(x, y), (x_test, y_test) = datasets.fashion_mnist.load_data() # 从官网上下载数据集,并且返回值格式为(x_train,y_train),(x_test,y_test)
#print(x.shape, y.shape, "x_test,y_test", x_test.shape, y_test.shape) # (60000, 28, 28) (60000,) x_test,y_test (10000, 28, 28) (10000,)
batch_size = 128
# tf.data.Dataset.from_tensor_slices((x,y)):
# 对传入的(x,y)进行切分,如果训练集为(5,3)最终产生的db中有5条数据,一条数据形状都是一个(3,),那么在手写数据集中,
# 每一个数据对应一个标签:数据(28,28,)对应(1,),把每一条数据和其标签对应起来了,之前是数据是数据 ,标签是标签,
db = tf.data.Dataset.from_tensor_slices((x, y))
# map(function,iterable,...):
# map(None,[2,4,6],[3,2,1])
# 结果如下
# [(2,3),(4,2),(6,1)]
# db.map(preprocess)之后对应的结果是[(x1,y1),(x2,y2)……]
# shuffle(10000):定义随机打乱数据时buffer的大小,按照顺序每11000条打乱一次
# batch(batch_size):按照顺序将数据长度划分为N*batch_size,每次取batch_size大小的数据,直到取完,取到最后一条batch大小可能会小于batch_size
db = db.map(preprocess).shuffle(60000).batch(batch_size)
print('db', len(db)) # db 60000条数据被分为469条抽样数据
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.map(preprocess).batch(batch_size)
print('db_test', len(db_test)) # db_test 79,10000条数据被分为79条抽样数据
db_iter = iter(db)
sample = next(db_iter) # 指向下一条抽样数据
# 定义模型
model = Sequential([
layers.Dense(256, activation='relu'),
layers.Dense(128, activation='relu'),
layers.Dense(64, activation='relu'),
layers.Dense(32, activation='relu'),
layers.Dense(10)
])
# 模型的输入层为可变尺寸
model.build(input_shape=[None, 28 * 28])
model.summary() # 输出图形结构
optimizer = optimizers.Adam(lr=1e-3)
for epoch in range(30):
for step, (x, y) in enumerate(db):
x = tf.reshape(x, [-1, 28 * 28])
with tf.GradientTape() as tape:
y_predict = model(x)
# print("y_predict",y_predict.shape) #y_predict (128, 10)
y_true = tf.one_hot(y, depth=10)
# print("y onehot",y_onehot.shape) #y onehot (128, 10)
loss_mse = tf.reduce_mean(tf.losses.MSE(y_true, y_predict))
loss_ce = tf.reduce_mean(tf.losses.categorical_crossentropy(y_true, y_predict, from_logits=True))
# MSE均方差损失函数适合做二分类,手写是多分类(10个),
# 输出可以看到均方差和交叉熵损失函数categorical_crossentropy(),均方差的损失增大结果不收敛,而交叉熵的损失是降低的,正确率增大结果收敛,交叉熵损失函数更适合多分类
# 另外根据标签不同,交叉熵损失函数可选择不同的函数:sparse_categorical_crossentropy函数要求标签是 数字编码:2, 0, 1
# categorical_crossentropy函数要求标签是 [0, 0, 1], [1, 0, 0], [0, 1, 0],适合本算法
# 计算梯度
grads = tape.gradient(loss_ce, model.trainable_variables)
# 梯度grads通过优化函数计算后,再赋值给模型中对应的参数model.trainable_variables
optimizer.apply_gradients(zip(grads, model.trainable_variables))
# 测试,计算争取个数和总的数据数
correct_num, all_num = 0, 0
for x, y in db_test:
x = tf.reshape(x, [-1, 28 * 28])
logits = model(x)
prob = tf.nn.softmax(logits, axis=1)
pred = tf.argmax(prob, axis=1)
pred = tf.cast(pred, dtype=tf.int32)
# 比较pred,y的数值,返回bool值的张量
correct = tf.equal(pred, y)
# 将bool值转换为0,1的常数
correct = tf.reduce_sum(tf.cast(correct, dtype=tf.int32))
# 统计在一个样本中,正确预测的个数
correct_num += int(correct)
all_num += x.shape[0]
correct_rate = correct_num / all_num
if epoch % 6 == 0:
print(epoch, step, 'loss', float(loss_ce), 'mse', float(loss_mse))
print("正确率为", correct_rate)
print(epoch, step, 'loss', float(loss_ce), 'mse', float(loss_mse))
print("正确率为", correct_rate)
结果:
按照tensorflow2的思路:
用compile()和fit()函数进行封装
__author__ = 'LXY'
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets,layers,optimizers,Sequential,metrics
tf.random.set_seed(1)
#数据预处理
def preprocess(x,y):
#转换x,y的数据类型为张量
x = tf.cast(x,dtype=tf.float32)/255.
x = tf.reshape(x,[28*28])
y = tf.cast(y,dtype=tf.int32)
y = tf.one_hot(y,depth=10)
return x,y
if __name__=='__main__':
batchsz = 128
(x_train,y_train),(x_test,y_test) = datasets.mnist.load_data()
db_train = tf.data.Dataset.from_tensor_slices((x_train,y_train))
db_train = db_train.map(preprocess).shuffle(60000).batch(batch_size=batchsz)
db_test = tf.data.Dataset.from_tensor_slices((x_test,y_test))
db_test = db_test.map(preprocess).batch(batch_size=batchsz)
sample = next(iter(db_train))
#设计网络模型
model = Sequential([
layers.Dense(256,activation='relu'),
layers.Dense(128,activation='relu'),
layers.Dense(64,activation='relu'),
layers.Dense(32, activation='relu'),
layers.Dense(10)
])
#建立模型
model.build(input_shape=[None,28*28])
model.summary()#输出网络结构
#编译
model.compile(
optimizer=optimizers.Adam(lr=0.001),#能够直接重定义lr
loss= tf.losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy','mse']#通过运行,训练数据后返回的history
)
'''
compile(
optimizer='rmsprop',#这样写不能直接重定义lr,要在compile外面自定义一遍,否则直接调用默认的
loss=None,
metrics=None,#用来配置模型评价的方法,如accuracy、mse
loss_weights=None,
sample_weight_mode=None,
weighted_metrics=None,
target_tensors=None,
distribute=None,
**kwargs
)
'''
history = model.fit(db_train,epochs=30,validation_data=db_train,validation_freq=2,verbose=0)
acc = history.history['accuracy']
mse = history.history['mse']
#从训练结果中拿想看的字段,比如输出每次训练
print(history.history,'\nacc', acc,'\nmse',mse)
#输出是每次迭代的acc和mse
'''输出:
{'loss': [0.28666120767593384, 0.10383852571249008],
'accuracy': [0.9153000116348267, 0.968833327293396],
'mse': [14.382641792297363, 20.249595642089844],
'val_loss': [0.07012375444173813],
'val_accuracy': [0.9796333312988281],
'val_mse': [19.037185668945312]}
acc [0.9153000116348267, 0.968833327293396]
mse [14.382641792297363, 20.249595642089844]'''
#test
index_test = model.evaluate(db_test,verbose=0)
'''
evaluate(
x=None,
y=None,
batch_size=None,
verbose=1,#日志显示:verbose = 0 为不在标准输出流输出日志信息,verbose = 1 为输出进度条记录(默认),verbose = 2 为每个epoch输出一行记录
sample_weight=None,
steps=None,
callbacks=None,
max_queue_size=10,#使用基于进程的线程时,一般用不着
workers=1,#使用基于进程的线程时,进程数,一般用不着
use_multiprocessing=False #使用基于进程的线程时,使用多线程,一般用不着
)
返回值是根据compile()函数指定的metrics的参数,默认只返回loss,若有指定的metrics参数,
返回的list顺序是['loss', 'accuracy', 'mse']
'''
print("index_test",model.metrics_names,index_test)
#index_test ['loss', 'accuracy', 'mse']
# [0.09124262630939484, 0.972100019454956, 19.534870147705078]
print("acc_test",index_test[0],"\nmse_test",index_test[1])
#需要的性能指标为:
#acc [0.9153000116348267, 0.968833327293396]
#mse [14.382641792297363, 20.249595642089844]
'''
一般来说,画图用的损失是训练时的loss,evaluate这一步必须输入测试数据的 特征值和标签,输出是loss、accuracy等性能指标
predict(),只需输入特征值,输出是预测结果
'''
sample = next(iter(db_test))
x = sample[0]
y = sample[1]
predict = model.predict(x)
'''
predict(
x,
batch_size=None,#默认32
verbose=0,
steps=None,
callbacks=None,
max_queue_size=10,
workers=1,
use_multiprocessing=False
)
输出为预测值
'''
y = tf.argmax(y,axis=1)
predict = tf.argmax(predict,axis=1)
result = tf.equal(y,predict)
result = tf.cast(result,tf.int32)
num_correct = tf.reduce_sum(result)
print("正确率",float(num_correct/predict.shape[0]))#预测正确的概率
结果
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 256) 200960
_________________________________________________________________
dense_1 (Dense) (None, 128) 32896
_________________________________________________________________
dense_2 (Dense) (None, 64) 8256
_________________________________________________________________
dense_3 (Dense) (None, 32) 2080
_________________________________________________________________
dense_4 (Dense) (None, 10) 330
=================================================================
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
_________________________________________________________________
{'loss': [0.28666120767593384, 0.10383852571249008, 0.06886275857686996, 0.0495663657784462, 0.040643416345119476, 0.029195507988333702, 0.02720797248184681, 0.025505756959319115, 0.020112279802560806, 0.018643202260136604, 0.017069770023226738, 0.017507432028651237, 0.011402882635593414, 0.013872605748474598, 0.011677131988108158, 0.0116088492795825, 0.010554376989603043, 0.00914525892585516, 0.010200493969023228, 0.008989917114377022, 0.01119424868375063, 0.008845041505992413, 0.00811140425503254, 0.006730934605002403, 0.008821123279631138, 0.0069463360123336315, 0.006111547816544771, 0.006241519469767809, 0.006629963871091604, 0.007409942802041769], 'accuracy': [0.9153000116348267, 0.968833327293396, 0.9793999791145325, 0.9845333099365234, 0.9866333603858948, 0.991433322429657, 0.9911166429519653, 0.9917333126068115, 0.9935833215713501, 0.9937833547592163, 0.9938499927520752, 0.9942833185195923, 0.9963333606719971, 0.9954166412353516, 0.9960500001907349, 0.9962000250816345, 0.996483325958252, 0.9970333576202393, 0.9968833327293396, 0.9970666766166687, 0.996150016784668, 0.996916651725769, 0.9976833462715149, 0.9980166554450989, 0.9973000288009644, 0.9979666471481323, 0.997783362865448, 0.9981333613395691, 0.9980166554450989, 0.9977666735649109], 'mse': [14.382641792297363, 20.249595642089844, 24.838550567626953, 30.609041213989258, 34.08992385864258, 39.48649215698242, 45.087684631347656, 49.6972541809082, 52.258705139160156, 58.230628967285156, 64.54415130615234, 64.37020874023438, 76.73501586914062, 76.05093383789062, 77.69914245605469, 82.2696304321289, 82.802490234375, 90.14991760253906, 86.53241729736328, 91.28441619873047, 93.02449035644531, 90.74909210205078, 99.11937713623047, 90.3672103881836, 96.59480285644531, 95.16741180419922, 98.20735168457031, 113.21511840820312, 106.63408660888672, 99.79490661621094], 'val_loss': [0.07012375444173813, 0.032868076115846634, 0.020607618615031242, 0.017107658088207245, 0.020325543358922005, 0.009028838016092777, 0.009236463345587254, 0.011792157776653767, 0.008487668819725513, 0.014387615956366062, 0.008093735203146935, 0.003469302551820874, 0.0029699832666665316, 0.014832036569714546, 0.0038829941768199205], 'val_accuracy': [0.9796333312988281, 0.9904166460037231, 0.9936500191688538, 0.9944499731063843, 0.993399977684021, 0.9971500039100647, 0.9970499873161316, 0.9959666728973389, 0.9972500205039978, 0.9956499934196472, 0.9975166916847229, 0.9988833069801331, 0.9991833567619324, 0.996749997138977, 0.9988499879837036], 'val_mse': [19.037185668945312, 28.722728729248047, 38.05353927612305, 42.44306945800781, 60.40375518798828, 69.1380386352539, 76.05284118652344, 82.06608581542969, 89.50780487060547, 96.0812759399414, 93.12293243408203, 92.8425064086914, 96.4831314086914, 114.92877197265625, 100.26526641845703]}
acc [0.9153000116348267, 0.968833327293396, 0.9793999791145325, 0.9845333099365234, 0.9866333603858948, 0.991433322429657, 0.9911166429519653, 0.9917333126068115, 0.9935833215713501, 0.9937833547592163, 0.9938499927520752, 0.9942833185195923, 0.9963333606719971, 0.9954166412353516, 0.9960500001907349, 0.9962000250816345, 0.996483325958252, 0.9970333576202393, 0.9968833327293396, 0.9970666766166687, 0.996150016784668, 0.996916651725769, 0.9976833462715149, 0.9980166554450989, 0.9973000288009644, 0.9979666471481323, 0.997783362865448, 0.9981333613395691, 0.9980166554450989, 0.9977666735649109]
mse [14.382641792297363, 20.249595642089844, 24.838550567626953, 30.609041213989258, 34.08992385864258, 39.48649215698242, 45.087684631347656, 49.6972541809082, 52.258705139160156, 58.230628967285156, 64.54415130615234, 64.37020874023438, 76.73501586914062, 76.05093383789062, 77.69914245605469, 82.2696304321289, 82.802490234375, 90.14991760253906, 86.53241729736328, 91.28441619873047, 93.02449035644531, 90.74909210205078, 99.11937713623047, 90.3672103881836, 96.59480285644531, 95.16741180419922, 98.20735168457031, 113.21511840820312, 106.63408660888672, 99.79490661621094]
index_test ['loss', 'accuracy', 'mse'] [0.10048948228359222, 0.9819999933242798, 99.79741668701172]
acc_test 0.10048948228359222
mse_test 0.9819999933242798
正确率 0.9921875