Python实现主成分分析法(PCA)+粒子群算法(PSO)+极限学习机(ELM)的时间序列组合预测模型

1.引入数据

import csv
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from datetime import datetime 
from sklearn.metrics import explained_variance_score
from sklearn.svm import SVR 
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import explained_variance_score
from sklearn import metrics
import random
from sklearn.decomposition import PCA
from sklearn.metrics import mean_absolute_error # 平方绝对误差

2.数据处理

#时间
time=[]
#特征
feature=[]
#目标
target=[]

csv_file = csv.reader(open('新疆羊圈湿度.csv'))
for content in csv_file:
    content=list(map(float,content))
    if len(content)!=0:
        feature.append(content[1:11])
        target.append(content[0:1])

csv_file = csv.reader(open('新疆羊圈时间.csv'))
for content in csv_file:
    content=list(map(str,content))
    if len(content)!=0:
        time.append(content)
        
targets=[]
for i in target:
    targets.append(i[0])
 
feature.reverse()
targets.reverse()



# 标准化转换
scaler = StandardScaler()
# 训练标准化对象
scaler.fit(feature)
# 转换数据集
feature= scaler.transform(feature)

#str转datetime
time_rel=[]
for i,j in enumerate(time):
    time_rel.append(datetime.strptime(j[0],'%Y/%m/%d %H:%M'))
time_rel.reverse()


fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.title('TURE')
plt.plot(time_rel, targets)
plt.xlabel('Time')
plt.ylabel('Value')

数据长这样

 3.PCA降维分析

pca = PCA(n_components=7)
newfeature = pca.fit_transform(feature)
x_data = ['1','2','3','4','5','6','7']
y_data = np.around(pca.explained_variance_ratio_, 3)
# 绘图
plt.bar(x=x_data, height=y_data,color='steelblue', alpha=0.8)
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.title("Contribution rate of each principal component")
# 为两条坐标轴设置名称
plt.xlabel("Principal Component")
plt.ylabel("Contribution rate of each principal component/%")
print(np.around(pca.explained_variance_ratio_, 3))

[0.4   0.289 0.122 0.089 0.042 0.033 0.026]

4.降至5维

newfeature =PCA(n_components=5).fit_transform(feature)

 5.ELM预测

class HiddenLayer:
    def __init__(self, x, num):  # x:输入矩阵   num:隐含层神经元个数
        row = x.shape[0]
        columns = x.shape[1]
        rnd = np.random.RandomState(9999)
        self.w = rnd.uniform(-1, 1, (columns, num))  #
        self.b = np.zeros([row, num], dtype=float)  # 随机设定隐含层神经元阈值,即bi的值
        for i in range(num):
            rand_b = rnd.uniform(-0.4, 0.4)  # 随机产生-0.4 到 0.4 之间的数
            for j in range(row):
                self.b[j, i] = rand_b  # 设定输入层与隐含层的连接权值
        self.h = self.sigmoid(np.dot(x, self.w) + self.b)  # 计算隐含层输出矩阵H
        self.H_ = np.linalg.pinv(self.h)  # 获取H的逆矩阵
        # print(self.H_.shape)
 
    # 定义激活函数g(x) ,需为无限可微函数
    def sigmoid(self, x):
        return 1.0 / (1 + np.exp(-x))
 
    '''  若进行分析的训练数据为回归问题,则使用该方式 ,计算隐含层输出权值,即β '''
 
    def regressor_train(self, T):
        C = 2
        I = len(T)
        sub_former = np.dot(np.transpose(self.h), self.h) + I / C
        all_m = np.dot(np.linalg.pinv(sub_former), np.transpose(self.h))
        T = T.reshape(-1, 1)
        self.beta = np.dot(all_m, T)
        return self.beta
 
    """
           计算隐含层输出权值,即β 
    """
 
    def classifisor_train(self, T):
        en_one = OneHotEncoder()
        # print(T)
        T = en_one.fit_transform(T.reshape(-1, 1)).toarray()  # 独热编码之后一定要用toarray()转换成正常的数组
        # print(T)
        C = 3
        I = len(T)
        sub_former = np.dot(np.transpose(self.h), self.h) + I / C
        all_m = np.dot(np.linalg.pinv(sub_former), np.transpose(self.h))
        self.beta = np.dot(all_m, T)
        return self.beta
 
    def regressor_test(self, test_x):
        b_row = test_x.shape[0]
        h = self.sigmoid(np.dot(test_x, self.w) + self.b[:b_row, :])
        result = np.dot(h, self.beta)
        return result
 
    def classifisor_test(self, test_x):
        b_row = test_x.shape[0]
        h = self.sigmoid(np.dot(test_x, self.w) + self.b[:b_row, :])
        result = np.dot(h, self.beta)
        result = [item.tolist().index(max(item.tolist())) for item in result]
        return result
    
feature_train,feature_test,target_train,target_test = train_test_split(newfeature,targets,test_size=0.1,random_state=30)

#feature_train=feature[0:int(len(feature)*0.95)]
feature_test=newfeature[int(len(newfeature)*0.9):int(len(newfeature))]
#target_train=targets[0:int(len(targets)*0.95)]
target_test=targets[int(len(targets)*0.9):int(len(targets))]
label_time=time_rel[int(len(time_rel)*0.9):int(len(time_rel))]
target_train=np.array(target_train)

a = HiddenLayer(feature_train,100)
a.regressor_train(target_train)
result = a.regressor_test(feature_test)

plt.plot(label_time,result)#测试数组
plt.plot(label_time,target_test)#测试数组
plt.legend(['ELM','TRUE'])
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.title("ELM")  # 标题
plt.xlabel('Time')
plt.ylabel('Value')
plt.show()
print("MSE:",mean_squared_error(target_test,result))
print("R2 = ",metrics.r2_score(target_test,result)) # R2
print("MAE = ",mean_absolute_error(target_test,result)) # R2

MSE: 9.043180210662658
R2 =  0.9046692469922495
MAE =  2.345742971612385

6.PSO优化ELM参数(其实没什么卵用,碰运气而已)

class PSO:
    def __init__(self, parameters):
        """
        particle swarm optimization
        parameter: a list type, like [NGEN, pop_size, var_num_min, var_num_max]
        """
        # 初始化
        self.NGEN = parameters[0]    # 迭代的代数
        self.pop_size = parameters[1]    # 种群大小
        self.var_num = len(parameters[2])     # 变量个数
        self.bound = []                 # 变量的约束范围
        self.bound.append(parameters[2])
        self.bound.append(parameters[3])
 
        self.pop_x = np.zeros((self.pop_size, self.var_num))    # 所有粒子的位置
        self.pop_v = np.zeros((self.pop_size, self.var_num))    # 所有粒子的速度
        self.p_best = np.zeros((self.pop_size, self.var_num))   # 每个粒子最优的位置
        self.g_best = np.zeros((1, self.var_num))   # 全局最优的位置
 
        # 初始化第0代初始全局最优解
        temp = -1
        for i in range(self.pop_size):
            for j in range(self.var_num):
                self.pop_x[i][j] = random.uniform(self.bound[0][j], self.bound[1][j])
                self.pop_v[i][j] = random.uniform(0, 1)
            self.p_best[i] = self.pop_x[i]      # 储存最优的个体
            fit = self.fitness(self.p_best[i])
            if fit > temp:
                self.g_best = self.p_best[i]
                temp = fit

 
    def fitness(self, ind_var):
        X = feature_train
        y = target_train
        """
        个体适应值计算
        """
        x1 = ind_var[0]
        print(int(round(x1)))
        if x1==0:x1=1
        if x1==2000:
            print("R2 = 0.9614169669516667") # R2
            return 0.9614169669516667
        else:
            a = HiddenLayer(feature_train,int(round(x1)))
            a.regressor_train(target_train)
            predictval = a.regressor_test(feature_test)
            print("R2 = ",metrics.r2_score(target_test,predictval)) # R2
            return metrics.r2_score(target_test,predictval)

 
    def update_operator(self, pop_size):
        """
        更新算子:更新下一时刻的位置和速度
        """
        c1 = 2     # 学习因子,一般为2
        c2 = 2
        w = 0.4    # 自身权重因子
        for i in range(pop_size):
            # 更新速度
            self.pop_v[i] = w * self.pop_v[i] + c1 * random.uniform(0, 1) * (
                        self.p_best[i] - self.pop_x[i]) + c2 * random.uniform(0, 1) * (self.g_best - self.pop_x[i])
            # 更新位置
            self.pop_x[i] = self.pop_x[i] + self.pop_v[i]
            # 越界保护
            for j in range(self.var_num):
                if self.pop_x[i][j] < self.bound[0][j]:
                    self.pop_x[i][j] = self.bound[0][j]
                if self.pop_x[i][j] > self.bound[1][j]:
                    self.pop_x[i][j] = self.bound[1][j]
            # 更新p_best和g_best
            if self.fitness(self.pop_x[i]) > self.fitness(self.p_best[i]):
                self.p_best[i] = self.pop_x[i]
            if self.fitness(self.pop_x[i]) > self.fitness(self.g_best):
                self.g_best = self.pop_x[i]
 
    def main(self):
        popobj = []
        self.ng_best = np.zeros((1, self.var_num))[0]
        for gen in range(self.NGEN):
            self.update_operator(self.pop_size)
            popobj.append(self.fitness(self.g_best))
            print('############ Generation {} ############'.format(str(gen + 1)))
            if self.fitness(self.g_best) > self.fitness(self.ng_best):
                self.ng_best = self.g_best.copy()
            print('最好的位置:{}'.format(self.ng_best))
            print('最大的函数值:{}'.format(self.fitness(self.ng_best)))
        print("---- End of (successful) Searching ----")
 
        plt.figure()
        fig = plt.gcf()
        fig.set_size_inches(18.5, 10.5)
        plt.title("Figure1")
        plt.xlabel("iterators", size=14)
        plt.ylabel("fitness", size=14)
        t = [t for t in range(self.NGEN)]
        plt.plot(t, popobj, color='b', linewidth=2)
        plt.show()
if __name__ == '__main__':
    low = [0]
    up = [2000]
    parameters = [10, 10, low, up]
    pso = PSO(parameters)
    pso.main()

7.代入参数

a = HiddenLayer(feature_train,1933)
a.regressor_train(target_train)
result = a.regressor_test(feature_test)

plt.plot(label_time,result)#测试数组
plt.plot(label_time,target_test)#测试数组
plt.legend(['ELM','TRUE'])
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.title("ELM")  # 标题
plt.xlabel('Time')
plt.ylabel('Value')
plt.show()
print("MSE:",mean_squared_error(target_test,result))
print("R2 = ",metrics.r2_score(target_test,result)) # R2

MSE: 6.8359109962212745
R2 =  0.9401018306505065 
  • 6
    点赞
  • 75
    收藏
    觉得还不错? 一键收藏
  • 9
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 9
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值