代码
# 需要引用的库
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib as mpl
from random import randint
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Dense,Input
# 数据处理 防止画图乱码
mpl.rcParams['font.sans-serif'] = ['KaiTi']
mpl.rcParams['font.serif'] = ['KaiTi']
plt.rcParams['font.size'] = 10
data = r"data.csv"
# 定义一个类
class Autoencoder_DBSCAN:
def __init__(self,data) -> None:
self.data = pd.read_csv(data,encoding="gbk")
self.scaler = MinMaxScaler()
self.df = self.scaler.fit_transform(self.data)
# 画出初始图像 各个特征值之间的关系
def draw_initial_pic(self):
self.data.plot()
plt.show()
marker = ["p","v",None]
for i in range(-1,len(self.data.columns)-1):
xdata,ydata = self.data[self.data.columns[i]], self.data[self.data.columns[i+1]]
plt.scatter(xdata,ydata,marker=marker[randint(0,len(marker)-1)],color="brown")
plt.xlabel(self.data.columns[i])
plt.ylabel(self.data.columns[i+1])
plt.show()
# 画出初始三维图
def init_th_dimision(self):
picture = plt.figure(figsize=(10,8))
pic = picture.add_subplot(111, projection='3d')
pic.scatter(self.data.values[:,0], self.data.values[:,1], self.data.values[:,2],color="orange")
pic.set_xlabel('交通量(veh/5min)')
pic.set_ylabel('占有率(%)')
pic.set_zlabel('平均速度(km/h)')
plt.show()
# 基于自编码器 对数据进行降维和升维
def autoencoder(self):
def build_encoder(input_shape,encoding_dim):
inputs = Input(shape=(input_shape,)) # 输入层
encoded = Dense(32,activation="linear")(inputs) # 此乃隐藏层
encoded = Dense(encoding_dim,activation="linear")(encoded) # 输出层
return tf.keras.Model(inputs, encoded)
def build_decoder(encoding_dim,input_shape):
inputs = Input(shape=(encoding_dim,))
decoded = Dense(32,activation="linear")(inputs)
decoded = Dense(input_shape,activation="linear")(decoded)
return tf.keras.Model(inputs, decoded)
encoding_dim = 2
input_shape = 3
encoder = build_encoder(input_shape, encoding_dim)
decoder = build_decoder(encoding_dim, input_shape)
autoencoder = tf.keras.Sequential([encoder, decoder])
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(self.df, self.df, epochs=10, batch_size=16)
with open("autoencoder_model.pickle","wb") as t:
pickle.dump(autoencoder, t)
# 对模型进行打包
with open("encoder_model.pickle","wb") as f:
pickle.dump(encoder, f)
def predict(self):
# 对数据进行预测
Autoencoder_DBSCAN.autoencoder(self)
f = open("autoencoder_model.pickle","rb")
autoencoder = pickle.load(f)
predition = autoencoder.predict(self.df)
result = self.scaler.inverse_transform(predition)
for i in range(3):
print(mean_squared_error(self.data.values[:,i],result[:,i]))
# 计算mse
picture = plt.figure(figsize=(10,8))
pic = picture.add_subplot(111, projection='3d')
pic.scatter(result[:,0], result[:,1], result[:,2],color="orange")
pic.set_xlabel('交通量(veh/5min)')
pic.set_ylabel('占有率(%)')
pic.set_zlabel('平均速度(km/h)')
plt.show()
# DBSCAN聚类算法
def dbscan(self):
f = open("encoder_model.pickle","rb")
encoder = pickle.load(f)
encoder_data = encoder.predict(self.df)
db = DBSCAN(eps=0.03, min_samples=5).fit(encoder_data)
labels = db.labels_
plt.scatter(encoder_data[:, 0], encoder_data[:, 1], c=labels, cmap='coolwarm')
plt.xlabel("eps=0.03, min_samples=5")
plt.ylabel("Feature 2")
plt.title("DBSCAN Clustering")
plt.show()
# KMean聚类算法
def k_mean(self):
f = open("encoder_model.pickle","rb")
encoder = pickle.load(f)
encoder_data = encoder.predict(self.df)
clf = KMeans(n_clusters=10,init="random",n_init=10,random_state=45)
clf.fit(self.df)
result = clf.predict(self.df)
plt.scatter(encoder_data[:, 0], encoder_data[:, 1], c=result, cmap='coolwarm')
plt.show()
if __name__ == "__main__":
start = Autoencoder_DBSCAN(data)
start.draw_initial_pic()
start.init_th_dimision()
start.predict()
start.dbscan()
start.k_mean()
结果图