tensorflow
参考tensorflow 官网教程
文章目录
1.手写数字识别
1.导入tensorflow模块
import tensorflow as tf
2.下载数据集
mnist = tf.keras.datasets.mnist
3.导入训练集和测试集到内存
(x_train, y_train), (x_test, y_test) = mnist.load_data()
4.标准化数据映射到[0,1],加快训练
x_train, x_test = x_train / 255.0, x_test / 255.0
5.将模型的各层堆叠起来,以搭建 tf.keras.Sequential 模型。
model = tf.keras.models.Sequential([
5.1 压缩数据使28X28的图片,变成784的一维数组
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(9999, activation='sigmoid', input_dim=99999) ,
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(10, activation='softmax')
])
6.为训练选择优化器和损失函数:
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
print(model.summary())
7.训练并验证模型
model.fit(x_train, y_train, epochs=3)
model.evaluate(x_test, y_test, verbose=2)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-MKfuSSRV-1616034006887)(C:\Users\yhd\AppData\Roaming\Typora\typora-user-images\image-20210107155754932.png)]
可以看到随着训练进行模型的损失越来越小,准确率越来越高
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-TghdNwB4-1616034006889)(C:\Users\yhd\AppData\Roaming\Typora\typora-user-images\image-20210107160330379.png)]
2.利用Fashion MNIST 对服装图像进行分类
# 1.导入tensorflow和keras模块 以及所需模块
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
# 2.下载数据集
fashion_mnist = keras.datasets.fashion_mnist
# 3.将数据集载入到内存
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
# 4.定义分类名称
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
# 5.预处理数据,查看第一张图片
plt.figure()
plt.imshow(train_images[10])
plt.colorbar()
plt.grid(True)
plt.show()
#5.1 将图片的像素值标准化,缩小到0到1之间
train_images = train_images / 255.0
test_images = test_images / 255.0
#6.验证数据格式 ,显示前25个图片并在下方显示类名称
plt.figure(figsize=(15,15))
for i in range(100):
plt.subplot(10,10,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(train_images[i], cmap=plt.cm.binary)
plt.xlabel(class_names[train_labels[i]])
plt.show()
#7.搭建模型
model = keras.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dense(10)
])
# 8.为训练选择优化器和损失函数
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
# 9.训练并验证模型
model.fit(train_images, train_labels, epochs=99)
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
print('\nTest accuracy:', test_acc)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-nvyK16hS-1616034006890)(C:\Users\yhd\AppData\Roaming\Typora\typora-user-images\image-20210107174059698.png)]
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-JDK6i71I-1616034006892)(C:\Users\yhd\AppData\Roaming\Typora\typora-user-images\image-20210107174109286.png)]
3.TUF电影评论文本分类。
import tensorflow as tf
from tensorflow import keras
import numpy as np
imdb = keras.datasets.imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)
print("Training entries: {}, labels: {}".format(len(train_data), len(train_labels)))
# 一个映射单词到整数索引的词典
word_index = imdb.get_word_index()
# 保留第一个索引
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2 # unknown
word_index["<UNUSED>"] = 3
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
def decode_review(text):
return ' '.join([reverse_word_index.get(i, '?') for i in text])
train_data = keras.preprocessing.sequence.pad_sequences(train_data,
value=word_index["<PAD>"],
padding='post',
maxlen=256)
test_data = keras.preprocessing.sequence.pad_sequences(test_data,
value=word_index["<PAD>"],
padding='post',
maxlen=256)
# 输入形状是用于电影评论的词汇数目(10,000 词)
vocab_size = 10000
model = keras.Sequential()
model.add(keras.layers.Embedding(vocab_size, 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))
model.summary()
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
x_val = train_data[:10000]
partial_x_train = train_data[10000:]
y_val = train_labels[:10000]
partial_y_train = train_labels[10000:]
history = model.fit(partial_x_train,
partial_y_train,
epochs=40,
batch_size=512,
validation_data=(x_val, y_val),
verbose=1)
results = model.evaluate(test_data, test_labels, verbose=2)
print(results)
4.利用cnn进行人脸识别
4.1 获取人脸图片
import random
import numpy as np
import cv2
import os
IMGSIZE = 64
# 创建目录
def createdir(*args):
for item in args:
if not os.path.exists(item):
os.makedirs(item)
def relight(imgsrc, alpha=1, bias=0):
imgsrc = imgsrc.astype(float)
imgsrc = imgsrc * alpha + bias
imgsrc[imgsrc < 0] = 0
imgsrc[imgsrc > 255] = 255
imgsrc = imgsrc.astype(np.uint8)
return imgsrc
def getfacefromcamera(outdir):
createdir(outdir)
camera = cv2.VideoCapture(0)
haar = cv2.CascadeClassifier('E:/QQ/1833654528/FileRecv/opencv/sources/data/haarcascades/haarcascade_frontalface_default.xml')
n = 1
while 1:
# 创建200张64*64图片
if n <= 200:
print('It`s processing %s image.' % n)
success, img = camera.read()
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = haar.detectMultiScale(gray_img, 1.3, 5)
# f_x, f_y, f_w, f_h分别为获取面部的左上角x, y坐标值,宽高值(原点(0, 0)在图片左上角)
for f_x, f_y, f_w, f_h in faces:
# 截取面部图片,先写y方向,再写x方向,别写反了(可以尝试尝试写反获取的图片)
face = img[f_y:f_y+f_h, f_x:f_x+f_w]
# 修改图片大小为64*64
face = cv2.resize(face, (IMGSIZE, IMGSIZE))
# 随机改变图片的明亮程度,增加图片复杂度
face = relight(face, random.uniform(0.5, 1.5), random.randint(-50, 50))
# 保存图片
cv2.imwrite(os.path.join(outdir, str(n)+'.jpg'), face)
# 在原图img面部上方20处写下你的名字
cv2.putText(img, 'haha', (f_x, f_y-20), cv2.FONT_HERSHEY_COMPLEX, 1, 255, 2)
# 画出方框,框选出你的face
img = cv2.rectangle(img, (f_x, f_y), (f_x + f_w, f_y + f_h), (255, 0, 0), 2)
n += 1
cv2.imshow('img', img)
key = cv2.waitKey(30) & 0xff
if key == 27:
break
else:
break
camera.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
# 输入你的名字,创建属于你的face文件夹
name = input('please input your name: ')
# 执行这段代码前必须在当前目录手动创建一个‘face_images’文件夹,否则下面代码找不到‘face_images'
getfacefromcamera(os.path.join('face_images', name))
4.2 对人脸图片进行处理
import random
import numpy as np
import cv2
import os
IMGSIZE = 64
# 创建目录
def createdir(*args):
for item in args:
if not os.path.exists(item):
os.makedirs(item)
def relight(imgsrc, alpha=1, bias=0):
imgsrc = imgsrc.astype(float)
imgsrc = imgsrc * alpha + bias
imgsrc[imgsrc < 0] = 0
imgsrc[imgsrc > 255] = 255
imgsrc = imgsrc.astype(np.uint8)
return imgsrc
def getfacefromcamera(outdir):
createdir(outdir)
camera = cv2.VideoCapture(0)
haar = cv2.CascadeClassifier('E:/QQ/1833654528/FileRecv/opencv/sources/data/haarcascades/haarcascade_frontalface_default.xml')
n = 1
while 1:
# 创建200张64*64图片
if n <= 200:
print('It`s processing %s image.' % n)
success, img = camera.read()
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = haar.detectMultiScale(gray_img, 1.3, 5)
# f_x, f_y, f_w, f_h分别为获取面部的左上角x, y坐标值,宽高值(原点(0, 0)在图片左上角)
for f_x, f_y, f_w, f_h in faces:
# 截取面部图片,先写y方向,再写x方向,别写反了(可以尝试尝试写反获取的图片)
face = img[f_y:f_y+f_h, f_x:f_x+f_w]
# 修改图片大小为64*64
face = cv2.resize(face, (IMGSIZE, IMGSIZE))
# 随机改变图片的明亮程度,增加图片复杂度
face = relight(face, random.uniform(0.5, 1.5), random.randint(-50, 50))
# 保存图片
cv2.imwrite(os.path.join(outdir, str(n)+'.jpg'), face)
# 在原图img面部上方20处写下你的名字
cv2.putText(img, 'haha', (f_x, f_y-20), cv2.FONT_HERSHEY_COMPLEX, 1, 255, 2)
# 画出方框,框选出你的face
img = cv2.rectangle(img, (f_x, f_y), (f_x + f_w, f_y + f_h), (255, 0, 0), 2)
n += 1
cv2.imshow('img', img)
key = cv2.waitKey(30) & 0xff
if key == 27:
break
else:
break
camera.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
# 输入你的名字,创建属于你的face文件夹
name = input('please input your name: ')
# 执行这段代码前必须在当前目录手动创建一个‘face_images’文件夹,否则下面代码找不到‘face_images'
getfacefromcamera(os.path.join('face_images', name))
4.3 训练模型进行识别
import tensorflow as tf
import numpy as np
# from dataset import make_anime_dataset
from tensorflow import keras
from tensorflow.keras import layers, optimizers, datasets, Sequential
import glob
# from dataset import make_anime_dataset
from skimage import io, transform
import random
import csv
import time
import cv2
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
root_img = 'F:\\Python 源码\\Include\\tensorflow\\人脸识别\\face_images'
img_mean = tf.constant([0.485, 0.456, 0.406])
img_std = tf.constant([0.229, 0.224, 0.225])
# 由卷积层和全连接层组成,中间用Flatten层进行平铺, inputs: [None, 64, 64, 3]
my_layers = [
layers.Conv2D(32, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(32, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding="same"),
layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding="same"),
layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding="same"),
layers.Flatten(),
layers.Dense(512, activation=tf.nn.relu),
layers.Dropout(rate=0.5),
layers.Dense(2, activation=None)
]
# root为我们之前获得图片数据的根目录face_images,filename为我们要加载的csv文件,
# name2label为我们获取的图片类型字典
def load_csv(root, filename, name2label):
# 如果根目录root下不存在filename文件,那么创建一个filename文件
if not os.path.exists(os.path.join(root, filename)):
# 创建一个图片路径的列表images
images = []
# 遍历字典里所有的元素,例如我的第一个为'xu',第二个为‘zheng’
for name in name2label.keys():
# 将路径下所有的jpg图片的路径写至images列表中
images += glob.glob(os.path.join(root, name, '*.jpg'))
# print('addr:', glob.glob(os.path.join(root, name, '*.jpg')))
print(len(images), images)
# 对images进行随机打乱
random.shuffle(images)
with open(os.path.join(root, filename), mode='w', newline='') as f:
writer = csv.writer(f)
for img in images:
# 获取路径最底层文件夹的名字
# os.sep为路径的分隔符,split()函数以给定分隔符进行切片(默认以空格切片),取从右往左数第二个
# img = '...a/b/c/haha.jpg' =>['...a', 'b', 'c', 'haha.jpg'], -2指的是'c'
name = img.split(os.sep)[-2]
# 查找字典对应元素的值
label = name2label[name]
# 添加到路径的后面
writer.writerow([img, label])
print('written into csv file:', filename)
# 如果存在filename文件,将其读取至imgs, labels这两个列表中
imgs, labels = [], []
with open(os.path.join(root, filename)) as f:
reader = csv.reader(f)
for row in reader:
# 读取路径和对应的label值
img, label = row
label = int(label)
# 将其分别压入列表中,并返回出来
imgs.append(img)
labels.append(label)
return imgs, labels
def load_faceimg(root, mode='train'):
# 创建图片类型字典,准备调用load_csv()方法
name2label = {}
for name in sorted(os.listdir(os.path.join(root))):
# 跳过root目录下不是文件夹的文件
if not os.path.isdir(os.path.join(root, name)):
continue
# name为根目录下各个文件夹的名字
# name2label.keys()表示字典name2label里所有的元素,len()表示字典所有元素的个数
# 一开始字典是没有元素的,所以'xu'的值为0, 之后字典元素有个一个,所以'zheng'的值为1
name2label[name] = len(name2label.keys())
# 调用load_csv()方法,返回值images为储存图片的目录的列表,labels为储存图片种类(0, 1两种)的列表
images, labels = load_csv(root, 'images.csv', name2label)
# 我们将前60%取为训练集,后20%取为验证集,最后20%取为测试集,并返回
if mode == 'train':
images = images[:int(0.6 * len(images))]
labels = labels[:int(0.6 * len(labels))]
elif mode == 'val':
images = images[int(0.6 * len(images)):int(0.8 * len(images))]
labels = labels[int(0.6 * len(labels)):int(0.8 * len(labels))]
else:
images = images[int(0.8 * len(images)):]
labels = labels[int(0.8 * len(labels)):]
return images, labels, name2label
def normalize(x, mean=img_mean, std=img_std):
# 标准化
# x: [64, 64, 3]
# mean: [64, 64, 3], std: [3]
x = (x - mean) / std
return x
def denormalize(x, mean=img_mean, std=img_std):
# 标准化的逆过程
x = x * std + mean
return x
# x:图片的路径List, y: 图片种类的数字编码List
def get_tensor(x, y):
# 创建一个列表ims
ims = []
for i in x:
# 读取路径下的图片
p = tf.io.read_file(i)
# 对图片进行解码,RGB,3通道
p = tf.image.decode_jpeg(p, channels=3)
# 修改图片大小为64*64
p = tf.image.resize(p, [64, 64])
# 将图片压入ims列表中
ims.append(p)
# 将List类型转换为tensor类型,并返回
ims = tf.convert_to_tensor(ims)
y = tf.convert_to_tensor(y)
return ims, y
# 预处理函数,x, y均为tensor类型
def preprocess(x, y):
# 数据增强
x = tf.image.random_flip_left_right(x) # 左右镜像
x = tf.image.random_crop(x, [64, 64, 3]) # 随机裁剪
# x: [0,255]=>0~1,将其值转换为float32
x = tf.cast(x, dtype=tf.float32) / 255.
# 0~1 => D(0, 1)
x = normalize(x)
# 将其值转换为int32
y = tf.cast(y, dtype=tf.int32)
return x, y
xu = [0]
zheng = [1]
sq = [1]
zheng = tf.convert_to_tensor(zheng, dtype=tf.int32)
xu = tf.convert_to_tensor(xu, dtype=tf.int32)
sq = tf.convert_to_tensor(sq, dtype=tf.int32)
if tf.equal(zheng, sq):
print('cool!')
# 加载图片,获得图片路径与图片种类编码的列表
images_train, labels_train, name2label = load_faceimg(root_img, mode='train')
images_val, labels_val, _ = load_faceimg(root_img, mode='val')
images_test, labels_test, _ = load_faceimg(root_img, mode='test')
print('images_train:', images_train)
# 从对应路径读取图片,并将列表转换为张量
x_train, y_train = get_tensor(images_train, labels_train)
x_val, y_val = get_tensor(images_val, labels_val)
x_test, y_test = get_tensor(images_test, labels_test)
# 可输出查看它们的shape
print('x_train:', x_train.shape, 'y_train:', y_train.shape)
print('x_val:', x_val.shape, 'y_val:', y_val.shape)
print('x_test:', x_test.shape, 'y_test:', y_test.shape)
# print('images:', len(images_test))
# print('labels:', len(labels_test))
# print('name2label', name2label)
# 切分传入参数的第一个维度,并进行随机打散,预处理和打包处理
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).map(preprocess).batch(10)
db_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
db_val = db_val.map(preprocess).batch(10)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.map(preprocess).batch(10)
# 创建一个迭代器,可以查看其shape大小
sample_train = next(iter(db_train))
sample_val = next(iter(db_val))
sample_test = next(iter(db_test))
print('sample_train:', sample_train[0].shape, sample_train[1].shape)
print('sample_val:', sample_val[0].shape, sample_val[1].shape)
print('sample_test:', sample_test[0].shape, sample_test[1].shape)
def main():
# my_net = Sequential(my_layers)
#
# my_net.build(input_shape=[None, 64, 64, 3])
# my_net.summary()
#
# optimizer = optimizers.Adam(lr=1e-3)
# acc_best = 0
# patience_num = 10
# no_improved_num = 0
# for epoch in range(50):
# for step, (x, y) in enumerate(db_train):
# with tf.GradientTape() as tape:
# out = my_net(x)
# # print('out', out.shape)
# logits = out
# y_onehot = tf.one_hot(y, depth=2)
# loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
# loss = tf.reduce_mean(loss)
# grads = tape.gradient(loss, my_net.trainable_variables)
# optimizer.apply_gradients(zip(grads, my_net.trainable_variables))
#
# if step % 5 == 0:
# print(epoch, step, 'loss:', float(loss))
#
# total_num = 0
# total_correct = 0
# for x2, y2 in db_test:
# out = my_net(x2)
# logits = out
# prob = tf.nn.softmax(logits, axis=1)
# # tf.argmax() : axis=1 表示返回每一行最大值对应的索引, axis=0 表示返回每一列最大值对应的索引
# pred = tf.argmax(prob, axis=1)
# # 将pred转化为int32数据类型,便于后面与y2进行比较
# pred = tf.cast(pred, dtype=tf.int32)
#
# correct = tf.cast(tf.equal(pred, y2), dtype=tf.int32)
# correct = tf.reduce_sum(correct)
#
# total_num += x2.shape[0]
# total_correct += int(correct)
# acc = total_correct / total_num
# if acc > acc_best:
# acc_best = acc
# no_improved_num = 0
# my_net.save('model1.h5')
# else:
# no_improved_num += 1
# print(epoch, 'acc:', acc, 'no_improved_num:', no_improved_num)
# if no_improved_num >= patience_num:
# break
my_net = tf.keras.models.load_model('model1.h5')
camera = cv2.VideoCapture(0)
haar = cv2.CascadeClassifier('E:/QQ/1833654528/FileRecv/opencv/sources/data/haarcascades/haarcascade_frontalface_default.xml')
n = 1
while 1:
if n <= 20000:
print('It`s processing %s image.' % n)
success, img = camera.read()
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = haar.detectMultiScale(gray_img, 1.3, 5)
for f_x, f_y, f_w, f_h in faces:
# 获得面部图片
face = img[f_y:f_y + f_h, f_x:f_x + f_w]
# 修改图片大小为64*64
face = cv2.resize(face, (64, 64))
# 将图片数据类型转换为tensor类型,完成后shape为[64, 64, 3]
face_tensor = tf.convert_to_tensor(face)
# 在0维度左侧增加一个维度,即[64, 64, 3]=>[1, 64, 64, 3]
face_tensor = tf.expand_dims(face_tensor, axis=0)
# 将tensor类型从uint8转换为float32
face_tensor = tf.cast(face_tensor, dtype=tf.float32)
# print('face_tensor', face_tensor)
# 输入至网络
logits = my_net(face_tensor)
# 将每一行进行softmax
prob = tf.nn.softmax(logits, axis=1)
print('prob:', prob)
# 取出prob中每一行最大值对应的索引
pred = tf.argmax(prob, axis=1)
pred = tf.cast(pred, dtype=tf.int32)
print('pred:', pred)
if tf.equal(pred, zheng):
cv2.putText(img, 'yhd', (f_x, f_y - 20), cv2.FONT_HERSHEY_COMPLEX, 1, 255, 2)
if tf.equal(pred, xu):
cv2.putText(img, 'chao', (f_x, f_y - 20), cv2.FONT_HERSHEY_COMPLEX, 1, 255, 2)
img = cv2.rectangle(img, (f_x, f_y), (f_x + f_w, f_y + f_h), (255, 0, 0), 2)
n += 1
cv2.imshow('img', img)
key = cv2.waitKey(30) & 0xff
if key == 27:
break
else:
break
camera.release()
cv2.destroyAllWindows()
# my_net.save('my_net.h5')
# del my_net
# new_net = tf.keras.models.load_model('my_net.h5')
# new_net.compile(optimizer=optimizers.Adam(lr=1e-3),
# loss=tf.losses.CategoricalCrossentropy(from_logits=True),
# metrics=['accuracy'])
# new_net.fit(x=x_train, y=y_train, epochs=50)
# loss1, acc1 = new_net.evaluate(x_test, y_test)
if __name__ == '__main__':
main()
5.预测燃油机燃油效率
# 1.导入所需模块
import pathlib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
print(tf.__version__)
# 2.下载数据集
dataset_path = keras.utils.get_file("auto-mpg.data",
"http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
# 3.使用 pandas 导入数据集
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv(dataset_path, names=column_names,
na_values="?", comment='\t',
sep=" ", skipinitialspace=True)
dataset = raw_dataset.copy()
dataset.tail()
# 4.数据清洗
dataset.isna().sum()
dataset = dataset.dropna()
origin = dataset.pop('Origin')
dataset['USA'] = (origin == 1) * 1.0
dataset['Europe'] = (origin == 2) * 1.0
dataset['Japan'] = (origin == 3) * 1.0
dataset.tail()
# 5.拆分训练数据集和测试数据集
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)
# 6.查看数据
sns.pairplot(train_dataset[["MPG", "Cylinders", "Displacement", "Weight"]], diag_kind="kde")
train_stats = train_dataset.describe()
train_stats.pop("MPG")
train_stats = train_stats.transpose()
# 7.从标签中分离特征
train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')
# 8.数据规范化
def norm(x):
return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)
# 9.搭建模型
def build_model():
model = keras.Sequential([
layers.Dense(64, activation='relu', input_shape=[len(train_dataset.keys())]),
layers.Dense(64, activation='relu'),
layers.Dense(1)
])
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
return model
model = build_model()
model.summary()
# 10.训练模型
# 通过为每个完成的时期打印一个点来显示训练进度
class PrintDot(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0: print('')
print('.', end='')
EPOCHS = 1000
history = model.fit(
normed_train_data, train_labels,
epochs=EPOCHS, validation_split=0.2, verbose=0,
callbacks=[PrintDot()])
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()
def plot_history(history):
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Mean Abs Error [MPG]')
plt.plot(hist['epoch'], hist['mae'],
label='Train Error')
plt.plot(hist['epoch'], hist['val_mae'],
label='Val Error')
plt.ylim([0, 5])
plt.legend()
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Mean Square Error [$MPG^2$]')
plt.plot(hist['epoch'], hist['mse'],
label='Train Error')
plt.plot(hist['epoch'], hist['val_mse'],
label='Val Error')
plt.ylim([0, 20])
plt.legend()
plt.show()
plot_history(history)
# 预测
model = build_model()
# patience 值用来检查改进 epochs 的数量
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
history = model.fit(normed_train_data, train_labels, epochs=EPOCHS,
validation_split = 0.2, verbose=0, callbacks=[early_stop, PrintDot()])
plot_history(history)
loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} MPG".format(mae))
test_predictions = model.predict(normed_test_data).flatten()
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0, plt.xlim()[1]])
plt.ylim([0, plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])
error = test_predictions - test_labels
plt.hist(error, bins=25)
plt.xlabel("Prediction Error [MPG]")
_ = plt.ylabel("Count")
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-B937n4DI-1616034006894)(C:\Users\yhd\AppData\Roaming\Typora\typora-user-images\image-20210107200254356.png)]
6.利用CIFAR10 数据集进行图像分类
# 1.导入具体的类库
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
(train_examples, train_labels), (test_examples, test_labels) = datasets.cifar10.load_data()
# 对像素值进行归一化处理
train_examples, test_examples = train_examples / 255.0, test_examples / 255.0
class_name = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
# 可视化图像数据
plt.figure(figsize=(10, 10))
for i in range(25):
plt.subplot(5, 5, i + 1)
plt.grid(False)
plt.xticks([])
plt.yticks([])
plt.imshow(train_examples[i], cmap=plt.cm.binary)
plt.xlabel(class_name[train_labels[i][0]])
plt.show()
# 基于CNN模型构建
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(10))
# 查看模型结构
model.summary()
# 模型评估
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
history = model.fit(train_examples, train_labels, epochs=20, validation_data=(test_examples, test_labels))
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.ylim([0.5, 1])
plt.legend(loc="lower right")
test_loss, test_acc = model.evaluate(test_examples, test_labels, verbose=2)
print(test_acc)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-4DH858Fc-1616034006896)(C:\Users\yhd\AppData\Roaming\Typora\typora-user-images\image-20210107190511667.png)]
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-S6jhM8so-1616034006896)(C:\Users\yhd\AppData\Roaming\Typora\typora-user-images\image-20210107195939485.png)]
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-IwOEURB8-1616034006897)(C:\Users\yhd\AppData\Roaming\Typora\typora-user-images\image-20210107200148885.png)]