自编码器_【手写数字】

最新推荐文章于 2023-07-23 00:55:46 发布

佐倉

最新推荐文章于 2023-07-23 00:55:46 发布

阅读量2.1k

点赞数 1

文章标签：自编码器手写数字识别潜在空间图像转换模型训练

本文链接：https://blog.csdn.net/qq_38641985/article/details/121350846

版权

自编码器

import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model 
import matplotlib.pyplot as plt
import numpy as np


print ("start")

def train_model():
    mnist=tf.keras.datasets.mnist

    #获取数据，训练集，测试集 60k训练，10K测试
    (x_train,y_train),(x_test,y_test)=mnist.load_data()


    #数据集格式转换
    x_train = x_train.astype('float32')/255.0 - 0.5
    x_test  = x_test.astype('float32')/255.0 - 0.5

  
    x_train=x_train.reshape(x_train.shape[0],-1)
    x_test=x_test.reshape(x_test.shape[0],-1)
    print(x_train.shape,x_test.shape)


    # 输入是大小为28x28，灰度图像
    img_shape = (784)
    # batchsize 为16
    batch_size = 16
    # 输出的潜在空间的维度
    latent_dim = 128

    input_img = tf.keras.Input(shape=(784,))
    input_img_ = tf.keras.Input(shape=(128,))



    encoded = Dense(128,activation="relu")(input_img)
    encoded = Dense(64,activation="relu")(encoded)
    encoded = Dense(10,activation="relu")(encoded)
    encoder_output = Dense(latent_dim,)(encoded)

    dencoded = Dense(10,activation="relu")(encoder_output)
    dencoded = Dense(64,activation="relu")(dencoded)
    dencoded = Dense(128,activation="relu")(dencoded)
    dencoded = Dense(784,activation="tanh")(dencoded)

    autoencoder = Model(input_img,dencoded)
    encoder = Model(input_img,encoder_output)

    encoded_imgs = encoder.predict(x_test)


    adam_optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.5)
    autoencoder.compile(optimizer=adam_optimizer,loss="mse")
    autoencoder.fit(x_train,x_train,epochs=5,batch_size=10,shuffle=True)

    autoencoder.save("autoencoder.h5")
    #encoder.save("encoder.h5")

    encoded_imgs = encoder.predict(x_test)
    print (encoded_imgs.shape)
    plt.scatter(encoded_imgs[:,0],encoded_imgs[:,1],c=y_test)
    plt.show()
    
train_model()
print ("end")

预测

#coding=utf-8

import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model,load_model
import matplotlib.pyplot as plt
import numpy as np
import cv2

print ("start")
def cv2_display(src):
    cv2.imshow('src',src)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

def predict_model():
    mnist=tf.keras.datasets.mnist
    #获取数据，训练集，测试集 60k训练，10K测试
    (x_train,y_train),(x_test,y_test)=mnist.load_data()
    x_test = x_test[:10]
    cv2.imwrite("test.png",x_test[0])
    #数据集格式转换
    x_train = x_train.astype('float32')/255.0 - 0.5
    x_test  = x_test.astype('float32')/255.0 - 0.5

  
    x_train=x_train.reshape(x_train.shape[0],-1)
    x_test=x_test.reshape(x_test.shape[0],-1)
    print(x_train.shape,x_test.shape)

    autoencoder = load_model("autoencoder.h5")
    moto_img = autoencoder.predict(x_test)
    print (moto_img.shape)
    moto_src = tf.reshape(moto_img[0],(28,28))
    moto_src = ((moto_src + 0.5)*255.0)
    moto_src = np.asarray(moto_src) 
    cv2.imwrite("test_output.png",moto_src)

    
predict_model()
print ("end")

原始图片
请添加图片描述
预测图片（自编码器预测输出的图片）

自己利用数据训练编码器解码器

编码器

import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model,load_model
import matplotlib.pyplot as plt
import numpy as np
import cv2

print ("start") 

def cv2_display(src):
    cv2.imshow('src',src)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

def train_model():
    mnist=tf.keras.datasets.mnist

    #获取数据，训练集，测试集 60k训练，10K测试
    (x_train,y_train),(x_test,y_test)=mnist.load_data()


    #数据集格式转换
    x_train = x_train.astype('float32')/255.0 - 0.5
    x_test  = x_test.astype('float32')/255.0 - 0.5

  
    x_train=x_train.reshape(x_train.shape[0],-1)
    x_test=x_test.reshape(x_test.shape[0],-1)
    print(x_train.shape,x_test.shape)


    # 输入是大小为28x28，灰度图像
    img_shape = (784)
    # batchsize 为16
    batch_size = 16
    # 输出的潜在空间的维度
    latent_dim = 128

    input_img_1 = tf.keras.Input(shape=(784,))
    input_img_2 = tf.keras.Input(shape=(128,))


    encoded = Dense(128,activation="relu")(input_img_1)
    encoded = Dense(64,activation="relu")(encoded)
    encoded = Dense(10,activation="relu")(encoded)
    encoder_output = Dense(latent_dim,)(encoded)

    dencoded = Dense(10,activation="relu")(input_img_2)
    dencoded = Dense(64,activation="relu")(dencoded)
    dencoded = Dense(128,activation="relu")(dencoded)
    dencoded = Dense(784,activation="tanh")(dencoded)

    encoder = Model(input_img_1,encoder_output)
    encoder.save("transform_128_encoder.h5")
    Y_train = encoder.predict(x_train)
    Y_test =  encoder.predict(x_test)

    np.save("Y_train.npy",Y_train)
    np.save("Y_test.npy",Y_test)


train_model()
print ("end")

说明：可以将28*28的手写数字转换为128维，维度可以自定义。

解码器

import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model,load_model
import matplotlib.pyplot as plt
import numpy as np
import cv2

print ("start") 

def cv2_display(src):
    cv2.imshow('src',src)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

def train_model():
    mnist=tf.keras.datasets.mnist

    #获取数据，训练集，测试集 60k训练，10K测试
    (x_train,y_train),(x_test,y_test)=mnist.load_data()


    #数据集格式转换
    x_train = x_train.astype('float32')/255.0 - 0.5
    x_test  = x_test.astype('float32')/255.0 - 0.5

  
    x_train=x_train.reshape(x_train.shape[0],-1)
    x_test=x_test.reshape(x_test.shape[0],-1)
    print(x_train.shape,x_test.shape)


    # 输入是大小为28x28，灰度图像
    img_shape = (784)
    # batchsize 为16
    batch_size = 16
    # 输出的潜在空间的维度
    latent_dim = 128

    input_img_1 = tf.keras.Input(shape=(784,))
    input_img_2 = tf.keras.Input(shape=(128,))



    encoded = Dense(128,activation="relu")(input_img_1)
    encoded = Dense(64,activation="relu")(encoded)
    encoded = Dense(10,activation="relu")(encoded)
    encoder_output = Dense(latent_dim,)(encoded)

    dencoded = Dense(10,activation="relu")(input_img_2)
    dencoded = Dense(64,activation="relu")(dencoded)
    dencoded = Dense(128,activation="relu")(dencoded)
    dencoded = Dense(784,activation="tanh")(dencoded)
 
    dencoder = Model(input_img_2,dencoded)
    Y = np.load("Y_train.npy")
    adam_optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001)
    dencoder.compile(optimizer=adam_optimizer,loss="mse")
    dencoder.fit(Y,x_train,epochs=100,batch_size=60,shuffle=True)
    dencoder.save("transform_784_encoder.h5")
train_model()
print ("end")

说明：将128维的向量解码为手写数字，需要训练，相当于反操作。

预测还原数据

import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model,load_model
import matplotlib.pyplot as plt
import numpy as np
import cv2

print ("start")

def cv2_display(src):
    cv2.imshow('src',src)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

def predict_model():
    Y = np.load("Y_test.npy")
    print (Y.shape)
    dencoder = load_model("transform_784_encoder.h5")
    encoded_imgs = dencoder.predict(Y)
    print (encoded_imgs.shape)
    predict_src = tf.reshape(encoded_imgs[0],(28,28))
    predict_src = ((predict_src + 0.5)*255.0)
    predict_src = np.asarray(predict_src) 
    cv2.imwrite("1_output.png",predict_src)
    
predict_model()
print ("end")

请添加图片描述
说明：可以看出来数据稍微有所不同，缺少了细节，清晰度也有所下降。

结尾

也可以将它迁移到彩色图片上去，但是虽然能够还原轮廓，但是细节部分相差太大，需要使用其他网络，达到更好的效果。
下面的是利用该方案的彩色图片输出效果。
彩色输入图片
请添加图片描述
彩色输出图片

寻找到更好的方案后会更新下一个。

佐倉

关注

1
点赞
踩
4

收藏

觉得还不错? 一键收藏
打赏
0
评论
自编码器_【手写数字】

自编码器import tensorflow as tffrom tensorflow.keras.layers import Densefrom tensorflow.keras.models import Model import matplotlib.pyplot as pltimport numpy as npprint ("start")def train_model(): mnist=tf.keras.datasets.mnist #获取数据，训练集，测试集
复制链接

扫一扫