项目实训@线性回归实现

最新推荐文章于 2024-05-27 17:00:50 发布
m0_54588463
最新推荐文章于 2024-05-27 17:00:50 发布
阅读量146
点赞数
文章标签：线性回归 python 数据挖掘
本文链接：https://blog.csdn.net/m0_54588463/article/details/125233751
版权
在本部分将展示线性回归预测的具体实现
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import getdata
from scipy.stats import pearsonr
import warnings
import math
from scipy.stats import pearsonr
from scipy.stats import pearsonr
def data_processing(a,b):

    plt.style.use('seaborn')

    data2 = getdata(a,b);
    print(data2.head(3))

    #cel(excel_name, sheet_name=2, na_values=na_values)
    # data3.head(3)

    # # 数据清洗

    # ## 缺失行补齐算法(不补齐会造成很严重的后果)

    temp_df = data2.copy()
    lack_list = list(set(time_list)-set(data2['last_update']))
    lack_df = pd.DataFrame(lack_list,columns=['last_update'])
    temp_df = pd.concat([temp_df,lack_df])
    temp_df.sort_values("last_update",inplace=True)
    temp_df.index = list(range(len(temp_df)))
    temp_df.shape

    data2 = temp_df
    data2.describe().to_csv('describe1.csv')
    data2.describe()

    data2.info()
    data2.isnull().sum().to_csv('info.csv')
    data2.isnull().sum()


    # ### 线性插值

    data2.iloc[:,2:] = data2.iloc[:,2:].interpolate()
    data2.isnull().sum()
 # ## 异常值

    # ### 消除负值，不低于边界点0

    data2 = data2.applymap(lambda v: max(0,v) if isinstance(v, float) else v)
    data2.describe().to_csv('describe2.csv')
    data2.describe()
 # # 特征展示

    # ## 分布图
    warnings.filterwarnings("ignore")
    def fkdeplot(vec, name):
        sns.kdeplot(vec)
        plt.title(name, fontsize=12)
        plt.show()

    # for i in range(10):
    #     fkdeplot(data2.iloc[:,i+2], 'Data')
    fkdeplot(data2.iloc[:,4].values, 'SO2')
    fkdeplot(data2.iloc[:,5].values, 'NO2')
    fkdeplot(data2.iloc[:,3].values, 'PM10')
    fkdeplot(data2.iloc[:,2].values, 'Pm2.5')
    fkdeplot(data2.iloc[:,7].values, 'O3')
    fkdeplot(data2.iloc[:,6].values, 'CO')
    fkdeplot(data2.iloc[:,10].values, 'Temperature')
    fkdeplot(data2.iloc[:,13].values, 'Humidity')
    fkdeplot(data2.iloc[:,12].values, 'Pressure')
    fkdeplot(data2.iloc[:,15].values, 'Wind Speed')

    # ## AQI分布图
    def compute_AQI(a):
        print(a)
        #a=np.array([n_SO2,n_NO2,n_PM10,n_PM25,n_O3,n_CO])
        T=np.array([[0,50,150,475,800,1600,2100,2620],[0,40,80,180,280,565,750,940],[0,50,150,250,350,420,500,600],[0,35,75,115,150,250,350,500],[0,100,160,215,265,800,0,0],[0,2,4,14,24,36,48,60]])
        I=np.array([0,50,100,150,200,300,400,500])
        BP_LO = []
        BP_Hi=[]
        IAQI_LO=[]
        IAQI_Hi=[]
        IAQI=np.zeros((1,6))
        result=[]
        for j in range(6):

            for i in range(1,8):
                if a[j] <= T[j, i]:
                    print(a[i])
                    BP_LO.append(T[j, i-1])
                    BP_Hi.append(T[j, i])
                    IAQI_LO.append(I[i-1])
                    IAQI_Hi.append(I[i])
                    break
                else:
                    continue

            IAQI[0, j] = (IAQI_Hi[j] - IAQI_LO[j]) / (BP_Hi[j] - BP_LO[j]) * (a[j] - BP_LO[j]) + IAQI_LO[j]
            result.append(math.ceil(IAQI[0, j]))
        AQI=np.max(result)
        c=np.argmax(result)
        name=["SO2", "NO2", "PM10", "PM25", "O3", "CO"]
    #     if AQI<=50:
    #         print("AQI为%d" %(AQI))
    #         print("当天无首要污染物")
    #     else:
    #         print("AQI为%d" % (AQI))
    #         print('首要污染物为%s' % (name[c]))
        return AQI

    for inst in data2.iloc[:,2:8].values:
        print('!!!')
        print(inst)
    AQI = [compute_AQI(inst) for inst in data2.iloc[:,2:8].values]
    data2['AQI'] = AQI
    fkdeplot(data2.iloc[:, 1].values, 'AQI')
    def fplot(vec, name):
        plt.plot(vec)
        plt.title(name)
        plt.show()
    show_num = 24
    fplot(data2.iloc[:show_num,4], 'SO2')
    fplot(data2.iloc[:show_num,5], 'NO2')
    fplot(data2.iloc[:show_num,3], 'PM10')
    fplot(data2.iloc[:show_num,2], 'Pm2.5')
    fplot(data2.iloc[:show_num,7], 'O3')
    fplot(data2.iloc[:show_num,6], 'CO')
    fplot(data2.iloc[:show_num,10], 'Temperature')
    fplot(data2.iloc[:show_num,13], 'Humidity')
    fplot(data2.iloc[:show_num,12], 'Pressure')
    fplot(data2.iloc[:show_num,15], 'Wind Speed')
    # ## 展示AQI的前6个时刻
    fplot(data2.iloc[:show_num,-1], 'AQI')
    # # 相关性分析(AQI与气象条件）
    # pearson相关性

    corr = data2.loc[:, ['temperature', 'humidity', 'pressure', 'wind_speed', 'aqi']].corr()



    corr.columns = ['temperature', 'pressure', 'humidity', 'wind_speed', 'aqi']
    corr.index = corr.columns
    sns.heatmap(corr, annot=True)

    AQI = data2.loc[:, 'aqi']
    # # 相关性分析(AQI与高级气象条件）

    # ## k小时变压

    warm_up = 20
    high_level_data = []

    # In[310]:

    k = 2

    def stat_k(pressure_k):
        # temp = pressure_k[-1]-pressure_k[0]
        # temp = np.var(pressure_k)
        temp = np.mean(pressure_k)
        # np.std(pressure_k)/np.mean(pressure_k)
        return temp

    pressure_k = []
    temp_data = data2.iloc[:, 12].values
    print(temp_data)
    print('total number', len(temp_data))
    for i in range(len(temp_data) - k + 1):
        pressure_k.append(temp_data[i:i + k])
    pressure_k = np.array(pressure_k)

    print('data.shape', pressure_k.shape)
    print('total number', len(pressure_k))
    pressure_k = [stat_k(inst) for inst in pressure_k]
    fkdeplot(pressure_k, 'pressure-%d' % k)
    fplot(pressure_k[:show_num], 'pressure-%d' % k)
    high_level_data.append(pressure_k[warm_up - k:])

    # In[311]:

    from scipy.stats import pearsonr

    def pressure_k_peasonr(k):
        pressure_k = []
        temp_data = data2.iloc[:, 12].values
        # print('total number', len(temp_data))
        for i in range(len(temp_data) - k + 1):
            pressure_k.append(temp_data[i:i + k])
        pressure_k = np.array(pressure_k)
        # print('data.shape', pressure_k.shape)
        # print('total number', len(pressure_k))
        pressure_k = [stat_k(inst) for inst in pressure_k]
        # print(len(AQI[k-1:]), len(pressure_k))
        temp_pearsonr = pearsonr(AQI[k - 1:], pressure_k)[0]
        # print(temp_pearsonr)
        return temp_pearsonr

    k_list = []
    pearsonr_list = []
    for k in range(10):
        temp_k = k + 5
        temp_pearsonr = pressure_k_peasonr(temp_k)
        k_list.append(temp_k)
        pearsonr_list.append(temp_pearsonr)

    plt.plot(k_list, pearsonr_list)
    plt.xlabel('K')
    plt.ylabel('pearsonr correlation')
    plt.title('pressure-k')
    plt.show()

    idx = np.argmax(np.abs(pearsonr_list))
    print('最大(绝对)相关性', pearsonr_list[idx])
    print('最大相关性对应k', k_list[idx])

    # ## k小时变温

    k = 2

    def stat_k(pressure_k):
        temp = pressure_k[-1] - pressure_k[0]
        # temp = np.var(pressure_k)
        # temp = np.mean(pressure_k)
        # np.std(pressure_k)/np.mean(pressure_k)
        return temp

    pressure_k = []
    temp_data = data2.iloc[:, 10].values
    print('total number', len(temp_data))
    for i in range(len(temp_data) - k + 1):
        pressure_k.append(temp_data[i:i + k])
    pressure_k = np.array(pressure_k)
    print('data.shape', pressure_k.shape)
    print('total number', len(pressure_k))
    pressure_k = [stat_k(inst) for inst in pressure_k]
    fplot(pressure_k[:show_num], 'temperatyre-%d' % k)
    fkdeplot(pressure_k, 'temperatyre-%d' % k)
    high_level_data.append(pressure_k[warm_up - k:])

    from scipy.stats import pearsonr

    def pressure_k_peasonr(k):
        pressure_k = []
        temp_data = data2.iloc[:, 10].values
        # print('total number', len(temp_data))
        for i in range(len(temp_data) - k + 1):
            pressure_k.append(temp_data[i:i + k])
        pressure_k = np.array(pressure_k)
        # print('data.shape', pressure_k.shape)
        # print('total number', len(pressure_k))
        pressure_k = [stat_k(inst) for inst in pressure_k]
        # print(len(AQI[k-1:]), len(pressure_k))
        temp_pearsonr = pearsonr(AQI[k - 1:], pressure_k)[0]
        # print(temp_pearsonr)
        return temp_pearsonr

    k_list = []
    pearsonr_list = []
    for k in range(10):
        temp_k = k + 2
        temp_pearsonr = pressure_k_peasonr(temp_k)
        k_list.append(temp_k)
        pearsonr_list.append(temp_pearsonr)

    plt.plot(k_list, pearsonr_list)
    plt.xlabel('K')
    plt.ylabel('pearsonr correlation')
    plt.title('temperature-k')
    plt.show()

    idx = np.argmax(np.abs(pearsonr_list))
    print('最大(绝对)相关性', pearsonr_list[idx])
    print('最大相关性对应k', k_list[idx])

    # ## k小时平均风速

    # In[314]:

    k = 3

    def stat_k(pressure_k):
        temp = np.mean(pressure_k)
        return temp

    pressure_k = []
    temp_data = data2.iloc[:, 15].values
    print('total number', len(temp_data))
    for i in range(len(temp_data) - k + 1):
        pressure_k.append(temp_data[i:i + k])
    pressure_k = np.array(pressure_k)
    print('data.shape', pressure_k.shape)
    print('total number', len(pressure_k))
    pressure_k = [stat_k(inst) for inst in pressure_k]
    fkdeplot(pressure_k, 'pressure-%d' % k)
    fplot(pressure_k[:show_num], 'pressure-%d' % k)
    high_level_data.append(pressure_k[warm_up - k:])

    # In[315]:



    def pressure_k_peasonr(k):
        pressure_k = []
        temp_data = data2.iloc[:, 15].values
        # print('total number', len(temp_data))
        for i in range(len(temp_data) - k + 1):
            pressure_k.append(temp_data[i:i + k])
        pressure_k = np.array(pressure_k)
        # print('data.shape', pressure_k.shape)
        # print('total number', len(pressure_k))
        pressure_k = [stat_k(inst) for inst in pressure_k]
        # print(len(AQI[k-1:]), len(pressure_k))
        temp_pearsonr = pearsonr(AQI[k - 1:], pressure_k)[0]
        # print(temp_pearsonr)
        return temp_pearsonr

    k_list = []
    pearsonr_list = []
    for k in range(10):
        temp_k = k + 2
        temp_pearsonr = pressure_k_peasonr(temp_k)
        k_list.append(temp_k)
        pearsonr_list.append(temp_pearsonr)

    plt.plot(k_list, pearsonr_list)
    plt.xlabel('K')
    plt.ylabel('pearsonr correlation')
    plt.title('wind-speed-k')
    plt.show()

    idx = np.argmax(np.abs(pearsonr_list))
    print('最大(绝对)相关性', pearsonr_list[idx])
    print('最大相关性对应k', k_list[idx])

    # ## k小时平均湿度

    # In[316]:

    k = 2

    def stat_k(pressure_k):
        temp = np.mean(pressure_k)
        return temp

    pressure_k = []
    temp_data = data2.iloc[:, 13].values
    print(1212121211)
    print(temp_data)
    print('total number', len(temp_data))
    for i in range(len(temp_data) - k + 1):
        pressure_k.append(temp_data[i:i + k])
    pressure_k = np.array(pressure_k)
    print('data.shape', pressure_k.shape)
    print('total number', len(pressure_k))
    pressure_k = [stat_k(inst) for inst in pressure_k]
    fkdeplot(pressure_k, 'pressure-%d' % k)
    fplot(pressure_k[:show_num], 'pressure-%d' % k)

    high_level_data.append(pressure_k[warm_up - k:])

    def pressure_k_peasonr(k):
        pressure_k = []
        temp_data = data2.iloc[:, 13].values
        # print('total number', len(temp_data))
        for i in range(len(temp_data) - k + 1):
            pressure_k.append(temp_data[i:i + k])
        pressure_k = np.array(pressure_k)
        # print('data.shape', pressure_k.shape)
        # print('total number', len(pressure_k))
        pressure_k = [stat_k(inst) for inst in pressure_k]
        # print(len(AQI[k-1:]), len(pressure_k))
        temp_pearsonr = pearsonr(AQI[k - 1:], pressure_k)[0]
        # print(temp_pearsonr)
        return temp_pearsonr

    k_list = []
    pearsonr_list = []
    for k in range(10):
        temp_k = k + 2
        temp_pearsonr = pressure_k_peasonr(temp_k)
        k_list.append(temp_k)
        pearsonr_list.append(temp_pearsonr)

    plt.plot(k_list, pearsonr_list)
    plt.xlabel('K')
    plt.ylabel('pearsonr correlation')
    plt.title('humidity-k')
    plt.show()

    idx = np.argmax(np.abs(pearsonr_list))
    print('最大(绝对)相关性', pearsonr_list[idx])
    print('最大相关性对应k', k_list[idx])

    """
    # ## k小时风向一致性

    # In[318]:


    from itertools import combinations

    k = 182

    def angle_minute_drift(alpha, beta):
        if max(alpha, beta)-min(alpha, beta)<180:
            result = max(alpha, beta)-min(alpha, beta)
        else:
            result = min(alpha, beta)-max(alpha, beta)+360
        return result

    def drift_direction(pressure_k):
        combine_list = list(combinations(pressure_k, 2))
        return np.array([angle_minute_drift(i, j) for i,j in combine_list])

    def stat_k(pressure_k):
        #pressure_k = drift_direction(pressure_k)
        temp = np.var(pressure_k)
        return temp

    pressure_k = []
    temp_data = data2.iloc[:,12].values
    print('total number', len(temp_data))
    for i in range(len(temp_data)-k+1):
        pressure_k.append(temp_data[i:i+k])
    pressure_k = np.array(pressure_k)
    print('data.shape', pressure_k.shape)  
    print('total number', len(pressure_k))  
    pressure_k = [stat_k(inst) for inst in pressure_k]
    fkdeplot(pressure_k, 'pressure-%d'%k)
    fplot(pressure_k[:show_num], 'pressure-%d'%k)
    high_level_data.append(pressure_k[warm_up-k:])


    # In[319]:


    from scipy.stats import pearsonr

    def pressure_k_peasonr(k):
        pressure_k = []
        temp_data = data2.iloc[:,12].values
        #print('total number', len(temp_data))
        for i in range(len(temp_data)-k+1):
            pressure_k.append(temp_data[i:i+k])
        pressure_k = np.array(pressure_k)
        #print('data.shape', pressure_k.shape)  
        #print('total number', len(pressure_k))  
        pressure_k = [stat_k(inst) for inst in pressure_k]
        #print(len(AQI[k-1:]), len(pressure_k))
        temp_pearsonr = pearsonr(AQI[k-1:],pressure_k)[0]
        #print(temp_pearsonr)
        return temp_pearsonr

    k_list = []
    pearsonr_list = []
    for k in range(24):
        temp_k = k*10+2
        temp_pearsonr = pressure_k_peasonr(temp_k)
        k_list.append(temp_k)
        pearsonr_list.append(temp_pearsonr)

    plt.plot(k_list, pearsonr_list)
    plt.xlabel('K')
    plt.ylabel('pearsonr correlation')
    plt.title('wind-direction-k')
    plt.show()

    idx = np.argmax(np.abs(pearsonr_list))
    print('最大(绝对)相关性',pearsonr_list[idx])
    print('最大相关性对应k',k_list[idx])

    """
    # # SWI静稳天气指数

    # ## 数据准备，得按照刚刚的结果抽取高级气象条件

    high_level_data.append(AQI[warm_up - 1:])
    high_level_data = np.array(high_level_data)
    high_level_data.shape

    high_level_data = pd.DataFrame(high_level_data).T
    high_level_data.columns = ['temperature', 'pressure', 'humidity', 'wind_speed', 'aqi']
    high_level_data

    # # 高级气象条件与AQI相关性

    corr = high_level_data.corr()
    corr.to_csv('corr2.csv')
    corr

    corr.columns = ['temperature', 'pressure', 'humidity', 'wind_speed', 'aqi']
    corr.index = corr.columns
    sns.heatmap(corr, annot=True)

    plt.scatter(x=high_level_data['pressure'], y=high_level_data['aqi'])
    plt.ylabel('AQI')
    plt.xlabel('Pressure')
    plt.show()

    plt.scatter(x=high_level_data['wind_speed'], y=high_level_data['aqi'])
    plt.ylabel('AQI')
    plt.xlabel('Wind Speed')
    plt.show()

    # ## 归一化

    from sklearn.preprocessing import MinMaxScaler

    y = high_level_data.iloc[:, -1]
    x = high_level_data.iloc[:, :-1]
    #print('1111')
    #print(y)
    #print(x)
    # minmaxscaler
    mm_x = MinMaxScaler()
    mm_y = MinMaxScaler()

    x = mm_x.fit_transform(x)
    y = mm_y.fit_transform(np.reshape(y.values, (-1, 1)))
    return x,y
    # ## 划分训练集测试集

    # In[327]:

   cut = 20
    x_train, y_train, x_test, y_test = x[:-cut], y[:-cut], x[-cut:], y[-cut:]
    print('!!!')
    print(x_train)
    print('@@@@@')
    print(y_train)
    print('x_train.shape', x_train.shape)
    print('x_test.shape', x_test.shape)
    print('y_train.shape', y_train.shape)
    print('y_test.shape', y_test.shape)

    # ## 线性回归

    from sklearn.linear_model import LinearRegression
    clf = LinearRegression()
    clf.fit(x_train, y_train)

    clf.coef_

    clf.intercept_

    plt.figure(figsize=(6,4))
    plt.bar(['Pressure','Temperature','Wind Speed','Humidity','Wind Direction'],clf.coef_[0,:])
    plt.title('coef')
    plt.show()

    from sklearn.metrics import mean_squared_error
    from sklearn.metrics import mean_absolute_error
    from sklearn.metrics import r2_score
    from math import sqrt

    def mape(y_true, y_pred):
        n = len(y_true)
        mape = sum(np.abs((y_true - y_pred) / y_true)) / n * 100
        return mape

    def easy_result(y_train, y_train_predict, train_index, model_index, show_num):
        # 画图进行展示
        y_train_predict = np.reshape(y_train_predict, (-1, 1))
        y_train_predict = mm_y.inverse_transform(y_train_predict)
        y_train_predict = y_train_predict[:, 0]
        y_train = np.reshape(y_train, (-1, 1))
        y_train = mm_y.inverse_transform(y_train)
        y_train = y_train[:, 0]

        plt.figure(figsize=(10, 2))
        plt.plot(y_train[-show_num:])
        plt.plot(y_train_predict[-show_num:])
        plt.legend(('real', 'predict'), fontsize='15')
        plt.title("%s Data" % train_index, fontsize='20')  # 添加标题
        plt.show()
        print('\n')

        plot_begin, plot_end = min(min(y_train), min(y_train_predict)), max(max(y_train), max(y_train_predict))
        plot_x = np.linspace(plot_begin, plot_end, 10)
        plt.figure(figsize=(5, 5))
        plt.plot(plot_x, plot_x)
        plt.plot(y_train, y_train_predict, 'o')
        plt.title("%s Data" % train_index, fontsize='20')  # 添加标题
        plt.show()

        # 输出结果
        print('%s上的MAE/RMSE/MAPE/R^2' % train_index)
        print(mean_absolute_error(y_train, y_train_predict))
        print(np.sqrt(mean_squared_error(y_train, y_train_predict)))
        print(mape(y_train, y_train_predict))
        print(r2_score(y_train, y_train_predict))

    y_train_predict = clf.predict(x_train)  # 预测结果
    easy_result(y_train, y_train_predict, 'Train', 'Linear Model', 100)  # 输出评价指标
    y_test_predict = clf.predict(x_test)  # 预测结果
    easy_result(y_test, y_test_predict, 'Test', 'Linear Model', 100)  # 输出评价指标
    # ## 岭回归 为了避免多重共线
    from sklearn.linear_model import Ridge
    clf = Ridge(alpha=0.1)
    clf.fit(x_train, y_train)
    plt.bar(['Pressure', 'Temperature', 'Wind Speed', 'Humidity', 'Wind Direction'], clf.coef_[0, :])
    plt.title('coef')
    plt.show()
    clf.coef_
    clf.intercept_
    y_train_predict = clf.predict(x_train)  # 预测结果
    easy_result(y_train, y_train_predict, 'Train', 'Linear Model', 100)  # 输出评价指标
    y_test_predict = clf.predict(x_test)  # 预测结果
    easy_result(y_test, y_test_predict, 'Test', 'Linear Model', 100)  # 输出评价指标
    # ## SWI静稳天气指数
    yy = clf.predict(x)
    yy = np.reshape(yy, (-1, 1))
    # y= mm_y.inverse_transform(y)
    # minmaxscaler
    mm_yy = MinMaxScaler()
    yy = mm_yy.fit_transform(np.reshape(yy, (-1, 1)))
    yy = yy[:, 0]
    fkdeplot(yy, 'SWI')

  # # 对SWI指数进行聚类得到天气类别
    # ### GMM
    from sklearn.mixture import GaussianMixture as GMM

    c = 3
    temp = yy.reshape(-1, 1)
    gmm = GMM(n_components=c).fit(temp)
    labels = gmm.predict(temp)
    plt.scatter(temp[:, 0], temp[:, 0], c=labels, s=50, cmap='viridis')

    result = pd.DataFrame(np.array([temp[:, 0], labels]).T)
    result

    # ## 分界点

    result.groupby(1).describe().to_csv('gmm_bound.csv')
    result.groupby(1).describe()

    df0, df1, df2 = result.groupby(1)

    # fkdeplot(df0[-1][0], '1')
    # fkdeplot(df1[-1][0], '2')
    # fkdeplot(df2[-1][0], '-1')
    sns.kdeplot(df0[-1][0])
    sns.kdeplot(df1[-1][0])
    sns.kdeplot(df2[-1][0])
    plt.title('distribution', fontsize=12)
    plt.show()
    # ### KMeans

    from sklearn.cluster import KMeans

    c = 3
    temp = np.array([yy, yy]).T
    kmeans_model = KMeans(n_clusters=c, random_state=1).fit(temp)
    labels = kmeans_model.labels_
    plt.scatter(temp[:, 0], temp[:, 1], c=labels, s=50, cmap='viridis')

    # ## Case Study

    show_num = 100
    plt.figure(figsize=(10, 2))
    plt.plot(yy[-show_num:], label='SWI')
    plt.plot(y[-show_num:, 0], label='AQI')
    plt.axhline(0.42, linestyle="dotted", linewidth=1, color='r')
    plt.axhline(0.60, linestyle="dotted", linewidth=1, color='r')
    plt.legend()
    plt.show()

    show_num = 0
    plt.figure(figsize=(10, 2))
    plt.plot(yy[-show_num:], label='SWI')
    plt.axhline(0.42, linestyle="dotted", linewidth=1, color='r')
    plt.axhline(0.60, linestyle="dotted", linewidth=1, color='r')
    plt.legend()
    plt.show()

    show_num = 1000
    plt.figure(figsize=(10, 2))
    plt.plot(yy[-show_num:], label='SWI')
    plt.axhline(0.42, linestyle="dotted", linewidth=1, color='r')
    plt.axhline(0.60, linestyle="dotted", linewidth=1, color='r')
    plt.legend()
    plt.show()

    # # 各个因子分类别统计

    result_stat = high_level_data.iloc[:, :-1]
    result_stat['classes'] = result[1]
    result_stat

    result_stat.groupby('classes').describe().to_csv('factor_stat.csv')
    result_stat.groupby('classes').describe()

    temp1, temp2, temp3 = result_stat.groupby('classes')

    sns.kdeplot(temp1[-1]['Wind Speed'])
    sns.kdeplot(temp2[-1]['Wind Speed'])
    sns.kdeplot(temp3[-1]['Wind Speed'])
    plt.title('Wind Speed Distribution', fontsize=12)
    plt.show()

    sns.kdeplot(temp1[-1]['Humidity'])
    sns.kdeplot(temp2[-1]['Humidity'])
    sns.kdeplot(temp3[-1]['Humidity'])
    plt.title('Humidity Distribution', fontsize=12)
    plt.show()

    sns.kdeplot(temp1[-1]['Temperature'])
    sns.kdeplot(temp2[-1]['Temperature'])
    sns.kdeplot(temp3[-1]['Temperature'])
    plt.title('Temperature Distribution', fontsize=12)
    plt.show()

    sns.kdeplot(temp1[-1]['Pressure'])
    sns.kdeplot(temp2[-1]['Pressure'])
    sns.kdeplot(temp3[-1]['Pressure'])
    plt.title('Pressure Distribution', fontsize=12)
    plt.show()
    """