【机器学习】基于自编码器升降维、聚类

代码

# 需要引用的库

import pickle
import numpy as np 
import pandas as pd
import tensorflow as tf
import matplotlib as mpl
from random import randint
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Dense,Input


# 数据处理 防止画图乱码
mpl.rcParams['font.sans-serif'] = ['KaiTi']
mpl.rcParams['font.serif'] = ['KaiTi']
plt.rcParams['font.size'] = 10

data = r"data.csv"


# 定义一个类
class Autoencoder_DBSCAN:

    def __init__(self,data) -> None:
        
        self.data = pd.read_csv(data,encoding="gbk")
        self.scaler = MinMaxScaler()
        self.df = self.scaler.fit_transform(self.data)

# 画出初始图像 各个特征值之间的关系
    def draw_initial_pic(self):

        self.data.plot()
        plt.show()
        
        marker = ["p","v",None]

        for i in range(-1,len(self.data.columns)-1):

            xdata,ydata = self.data[self.data.columns[i]], self.data[self.data.columns[i+1]]
            plt.scatter(xdata,ydata,marker=marker[randint(0,len(marker)-1)],color="brown")
            plt.xlabel(self.data.columns[i])
            plt.ylabel(self.data.columns[i+1])
            plt.show()
    
# 画出初始三维图
    def init_th_dimision(self):

        picture = plt.figure(figsize=(10,8))
        pic = picture.add_subplot(111, projection='3d')
        pic.scatter(self.data.values[:,0], self.data.values[:,1], self.data.values[:,2],color="orange")
        pic.set_xlabel('交通量(veh/5min)')
        pic.set_ylabel('占有率(%)')
        pic.set_zlabel('平均速度(km/h)')
        plt.show()

# 基于自编码器 对数据进行降维和升维   
    def autoencoder(self):

        def build_encoder(input_shape,encoding_dim):

            inputs = Input(shape=(input_shape,)) # 输入层
            encoded = Dense(32,activation="linear")(inputs) # 此乃隐藏层
            encoded = Dense(encoding_dim,activation="linear")(encoded) # 输出层

            return tf.keras.Model(inputs, encoded)


        def build_decoder(encoding_dim,input_shape):

            inputs = Input(shape=(encoding_dim,))
            decoded = Dense(32,activation="linear")(inputs)
            decoded = Dense(input_shape,activation="linear")(decoded)

            return tf.keras.Model(inputs, decoded)
    
        encoding_dim = 2
        input_shape = 3

        encoder = build_encoder(input_shape, encoding_dim)
        decoder = build_decoder(encoding_dim, input_shape)

        autoencoder = tf.keras.Sequential([encoder, decoder])

        autoencoder.compile(optimizer='adam', loss='mse')

        autoencoder.fit(self.df, self.df, epochs=10, batch_size=16)

        with open("autoencoder_model.pickle","wb") as t:
            pickle.dump(autoencoder, t)
        # 对模型进行打包
        with open("encoder_model.pickle","wb") as f:
            pickle.dump(encoder, f)
    

    def predict(self):
        # 对数据进行预测
        Autoencoder_DBSCAN.autoencoder(self)

        f = open("autoencoder_model.pickle","rb")
        autoencoder = pickle.load(f)

        predition = autoencoder.predict(self.df)
        result = self.scaler.inverse_transform(predition)

        for i in range(3):
            print(mean_squared_error(self.data.values[:,i],result[:,i]))
        # 计算mse
        picture = plt.figure(figsize=(10,8))
        pic = picture.add_subplot(111, projection='3d')
        pic.scatter(result[:,0], result[:,1], result[:,2],color="orange")
        pic.set_xlabel('交通量(veh/5min)')
        pic.set_ylabel('占有率(%)')
        pic.set_zlabel('平均速度(km/h)')
        plt.show()

# DBSCAN聚类算法
    def dbscan(self):

        f = open("encoder_model.pickle","rb")
        encoder = pickle.load(f)
        encoder_data = encoder.predict(self.df)

        db = DBSCAN(eps=0.03, min_samples=5).fit(encoder_data)
        labels = db.labels_
        plt.scatter(encoder_data[:, 0], encoder_data[:, 1], c=labels, cmap='coolwarm')
        plt.xlabel("eps=0.03, min_samples=5")
        plt.ylabel("Feature 2")
        plt.title("DBSCAN Clustering")
        plt.show()

# KMean聚类算法
    def k_mean(self):

        f = open("encoder_model.pickle","rb")
        encoder = pickle.load(f)
        encoder_data = encoder.predict(self.df)

        clf = KMeans(n_clusters=10,init="random",n_init=10,random_state=45)
        clf.fit(self.df)
        result = clf.predict(self.df)
        plt.scatter(encoder_data[:, 0], encoder_data[:, 1], c=result, cmap='coolwarm')
        plt.show()


if __name__ == "__main__":

    start = Autoencoder_DBSCAN(data)
    start.draw_initial_pic()
    start.init_th_dimision()
    start.predict()
    start.dbscan()
    start.k_mean()

结果图

  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值