（4）基于BP神经网络和决策树算法的用户负荷预测

最新推荐文章于 2024-01-23 15:20:57 发布

淮杨

最新推荐文章于 2024-01-23 15:20:57 发布

阅读量1k

点赞数 3

分类专栏： Python

本文链接：https://blog.csdn.net/qq_40259429/article/details/110673710

版权

Python 专栏收录该内容

4 篇文章 1 订阅

订阅专栏

系统配置

python3.7.9
tensorflow1.14
numpy1.16.5（没找到包，直接pip install numpy==1.16.5了）
清华园tensorflow包
tensorflow-1.14.0-cp37-cp37m-win_amd64.whl （pip install tensorflow-1.14.0-cp37-cp37m-win_amd64.whl）
tensorflow安装
数据

算法总结

1.数据处理部分

# -*- coding=utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import csv
#导入数据
p = r'D:\LoadPred\train.csv'
with open(p,encoding = 'utf-8') as f:
    data = np.loadtxt(f,float,delimiter = ",", skiprows = 1)
p = r'D:\LoadPred\train_single.csv'
with open(p,encoding = 'utf-8') as f:
    data_csv = np.loadtxt(f,float,delimiter = ",", skiprows = 1)
print(data)
print(data_csv)

导入数据

#数据提取
dataSeason = data[0:,:1]
dataMonth = data[0:,1:2]
dataWeek = data[0:,2:3]
dataDay = data[0:,3:4]
dataHour = data[0:,4:5]
dataTemperature = data[0:,5:6]
dataAvgTemp = data[0:,6:7]
dataAllLoad = data[0:,7:8]
dataAllUserLoad = data[0:,8:]
print(dataAllUserLoad)

提取数据

x = data[0:,:7]
y = data[0:,7:8] 
df = pd.DataFrame(x, columns=['season','month','week','wk_day','hour','temperature','avg_temp'])
df['Target'] = pd.DataFrame(y, columns=['all_load'])
df.head()
plt.figure(figsize=(8,8))
# 画热力图，数值为两个变量之间的相关系数
p=sns.heatmap(df.corr(), annot=True, square=True)

热力图

#画相关图
p = r'D:\LoadPred\train_single.csv'
with open(p,encoding = 'utf-8') as f:
    data_csv = pd.read_csv(f,float,delimiter = ",", skiprows = 0)
sns.pairplot(data_csv)

在这里插入图片描述

2.K-Means聚类

#导入D:\Program Files (x86)\Python37\Scripts文件夹下的文件
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
data = np.genfromtxt("./LoadPred/train_300usr.csv",delimiter=',' )
print(data)
data_300usr = data[1:,1:]
print(data_300usr.shape)#7896行数据，300个用户，24小时的数据

在这里插入图片描述

#300个用户，将7896行数据按照小时数相加
data_300usr_24 = [[0 for i in range(24)] for j in range(300)]

for k in range(300):
    for i in range(24):
        sum = 0.0
        for j in range(i,data_300usr.shape[0],24):
            sum += data_300usr[j][k]
        data_300usr_24[k][i] = sum
data_300usr_24 = np.array(data_300usr_24)
print(data_300usr_24.shape)
#保存数据
np.savetxt("result1.csv",data_300usr_24,delimiter=',')
#再读取
data_csv = pd.read_csv("./result1.csv",header=None)
# df = pd.DataFrame(data_csv)
# df.head()
data_csv#显示数据

在这里插入图片描述

#绘制数据曲线
fig,ax=plt.subplots(figsize=(12,6))
plt.plot(data_csv.T)
plt.show()

在这里插入图片描述

#聚类代码
from sklearn.cluster import KMeans

class EnergyFingerPrints():
     
    def __init__(self,data):
        # will contain the centroid of each cluster
        self.means = [] 
        self.data = data

    def elbow_method(self,n_clusters):
        """Performs elbow method for a predefined number
        of clusters.
        
        Parameters
        ----------
        n_clusters : int
            the number of clusters to perform the elbow method
        
        Returns
        ---------
        A plot the of elbow method
        """
        fig,ax=plt.subplots(figsize=(8,4)) 
        distortions = []

        for i in range(1, n_clusters):
            km = KMeans(n_clusters = i, 
                        init='k-means++', #初始中心簇的获取方式，k-means++一种比较快的收敛的方法
                        n_init=10,        #初始中心簇的迭代次数
                        max_iter=300,     #数据分类的迭代次数
                        random_state=0)   #初始化中心簇的方式
            km.fit(self.data)
            distortions.append(km.inertia_) #inertia计算样本点到最近的中心点的距离之和
        
        plt.plot(range(1,n_clusters), distortions, marker='o',lw=1)
        plt.xlabel('聚类数量')
        plt.ylabel('至中心点距离之和')
        plt.show() 
        
    def get_cluster_counts(self):
        return pd.Series(self.predictions).value_counts()
    
    def labels(self,n_clusters):
        self.n_clusters = n_clusters
        return KMeans(self.n_clusters, init='k-means++', n_init=10,max_iter=300,random_state=0).fit(self.data).labels_
        
    def fit(self,n_clusters):
        """Performs K-means clustering for the load-profiles
        
        Parameters
        ----------
        n_clusters : int
        
        Returns
        --------
        count_dict : dict
            The number of load-profiles in each cluster
        """
        self.n_clusters = n_clusters
        self.kmeans = KMeans(self.n_clusters)
        self.predictions = self.kmeans.fit_predict(self.data)

    def plot(self):
        """Plots all loads in each cluster"""
        
        self.cluster_names = [str(x) for x in range(self.n_clusters)]
        fig,ax=plt.subplots(figsize=(12,16))

        for i in range(0,self.n_clusters):
            all_data = [] 
            for x,y in zip(self.data,self.predictions):
                    if y == i:
                        all_data.append(x)
                        plt.subplot(4,1,i+1)
                        plt.plot(x,alpha=0.06,color="blue",lw=2)
                        #plt.ylim(0,4)
                        plt.xlim(0,96)
                        plt.title('Cluster%s'%(i+1))
                        plt.ylabel('用电量/kW')
                    
            all_data_array = np.array(all_data)
            mean = all_data_array.mean(axis=0)
            self.means.append(mean)
            plt.plot(mean, color="black",linewidth=4)
        
        plt.show()
        
    def plot_energy_fingerprints(self):
        """Plots the mean of each cluster in single plot"""
        fig,ax=plt.subplots(figsize=(8,5))
        
        for i,item in enumerate(self.means):
            plt.plot(item, label = "cluster %s"%(str(i+1)))
            plt.xlim(0,96)
        plt.ylabel('用电量/kW')
        plt.xticks([0,20,40,60,80],['00:00','05:00','10:00','15:00','20:00'],rotation=60)
        plt.grid()
        plt.legend()
        plt.show()

#聚类簇设计
load_data=np.array(data_csv)
energy_clusters = EnergyFingerPrints(load_data)
energy_clusters.elbow_method(n_clusters=13)

在这里插入图片描述

分为4类
energy_clusters.fit(n_clusters = 4)
energy_clusters.get_cluster_counts()#计算每类有多少，并输出
group=energy_clusters.labels(n_clusters = 4)#显示300个用户分别是哪一类
print(group)

在这里插入图片描述

num=data_csv.index
print(num)

cls=pd.DataFrame(list(num))
cls['cluster']=list(group)
cls.columns=['user_id','cluster']
#通过排序可以得到每个类中的用户id
cls=cls.sort_values(by='cluster',ascending=True)
cls.reset_index(drop=True)

在这里插入图片描述

#获得属于第一分类簇的用户id
np.array(cls.loc[cls.cluster ==0].user_id)
#获得属于第二分类簇的用户id
np.array(cls.loc[cls.cluster ==1].user_id)
#获得属于第三分类簇的用户id
np.array(cls.loc[cls.cluster ==2].user_id)
#获得属于第四分类簇的用户id
np.array(cls.loc[cls.cluster ==3].user_id)

在这里插入图片描述

3.回归决策树、分类决策树

#预测温度，回归决策树
from sklearn import tree

model = tree.DecisionTreeRegressor()
model.fit(x_train_wd, y_train_temperature)
answer_temperature = model.predict(x_test)

import pandas as pd
answer_temperature = pd.to_numeric(answer_temperature)#数组
from numpy import *
answer_temperature=mat(answer_temperature)#转换为矩阵
answer_temperature = np.reshape(answer_temperature, (864,1))
print(answer_temperature)
np.savetxt("answer_temperature_bp.csv", answer_temperature, delimiter=',')

#分类决策树，对第二类用户的预测
import numpy as np
from sklearn import tree
from sklearn.model_selection import train_test_split
import pydotplus

# 使用信息熵作为划分标准，对决策树进行训练
clf = tree.DecisionTreeClassifier(criterion='entropy', splitter='random',max_depth=40,min_samples_split=2)
print(clf)
clf.fit(x_train_1, y_train_1)

# 打印参数，反映每个特征的影响力，越大表示该特征在分类中起到的作用越大
print(clf.feature_importances_)


# 对测试数据进行预测，准确度较低，说明过拟合
y_pred_1 = clf.predict(x_test_2)#字符串 
import pandas as pd
y_pred_1 = pd.to_numeric(y_pred_1)#转换为数组
from numpy import *
y_pred_1=mat(y_pred_1)#转换为矩阵
y_pred_1 = np.reshape(y_pred_1, (864,1))
print(y_pred_1)
np.savetxt("y_pred_1.csv", y_pred_1, delimiter=',')

4.BP NN

#对一、三、四类用户的预测
import tensorflow as tf
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(20, input_shape=(6,), activation="relu"))  # tanh 一层
# 中间层的节点数20，6列特征，激活函数为relu函数
model.add(tf.keras.layers.Dense(1, activation="relu"))

model.compile(optimizer='adam',  # # adam优化器
              loss='mse',  # mse 损失函数
              metrics=['acc'])  # 准确率
histroy = model.fit(x_train_0, y_train_0, batch_size=8, epochs=50)
# 批处理，一次处理8个，更改一次权重， epochs迭代次数

#预测
y_pred_0 = model.predict(x_test_2)
print(y_pred_0)
np.savetxt("y_pred_0.csv", y_pred_0, delimiter=',')

#绘制预测图形
import matplotlib.pyplot as plt
plt.plot(y_pred)
plt.show()

在这里插入图片描述

淮杨

关注

3
点赞
踩
9

收藏

觉得还不错? 一键收藏
打赏
0
评论
（4）基于BP神经网络和决策树算法的用户负荷预测

python3.7.9tensorflow1.14numpy1.16.5
复制链接

扫一扫

专栏目录

（4）基于BP神经网络和决策树算法的用户负荷预测

系统配置

算法总结

1.数据处理部分

2.K-Means聚类

3.回归决策树、分类决策树

4.BP NN

“相关推荐”对你有帮助么？