1.引入数据
import csv
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import explained_variance_score
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import explained_variance_score
from sklearn import metrics
import random
from sklearn.decomposition import PCA
from sklearn.metrics import mean_absolute_error # 平方绝对误差
2.数据处理
#时间
time=[]
#特征
feature=[]
#目标
target=[]
csv_file = csv.reader(open('新疆羊圈湿度.csv'))
for content in csv_file:
content=list(map(float,content))
if len(content)!=0:
feature.append(content[1:11])
target.append(content[0:1])
csv_file = csv.reader(open('新疆羊圈时间.csv'))
for content in csv_file:
content=list(map(str,content))
if len(content)!=0:
time.append(content)
targets=[]
for i in target:
targets.append(i[0])
feature.reverse()
targets.reverse()
# 标准化转换
scaler = StandardScaler()
# 训练标准化对象
scaler.fit(feature)
# 转换数据集
feature= scaler.transform(feature)
#str转datetime
time_rel=[]
for i,j in enumerate(time):
time_rel.append(datetime.strptime(j[0],'%Y/%m/%d %H:%M'))
time_rel.reverse()
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.title('TURE')
plt.plot(time_rel, targets)
plt.xlabel('Time')
plt.ylabel('Value')
数据长这样
3.PCA降维分析
pca = PCA(n_components=7)
newfeature = pca.fit_transform(feature)
x_data = ['1','2','3','4','5','6','7']
y_data = np.around(pca.explained_variance_ratio_, 3)
# 绘图
plt.bar(x=x_data, height=y_data,color='steelblue', alpha=0.8)
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.title("Contribution rate of each principal component")
# 为两条坐标轴设置名称
plt.xlabel("Principal Component")
plt.ylabel("Contribution rate of each principal component/%")
print(np.around(pca.explained_variance_ratio_, 3))
[0.4 0.289 0.122 0.089 0.042 0.033 0.026]
4.降至5维
newfeature =PCA(n_components=5).fit_transform(feature)
5.ELM预测
class HiddenLayer:
def __init__(self, x, num): # x:输入矩阵 num:隐含层神经元个数
row = x.shape[0]
columns = x.shape[1]
rnd = np.random.RandomState(9999)
self.w = rnd.uniform(-1, 1, (columns, num)) #
self.b = np.zeros([row, num], dtype=float) # 随机设定隐含层神经元阈值,即bi的值
for i in range(num):
rand_b = rnd.uniform(-0.4, 0.4) # 随机产生-0.4 到 0.4 之间的数
for j in range(row):
self.b[j, i] = rand_b # 设定输入层与隐含层的连接权值
self.h = self.sigmoid(np.dot(x, self.w) + self.b) # 计算隐含层输出矩阵H
self.H_ = np.linalg.pinv(self.h) # 获取H的逆矩阵
# print(self.H_.shape)
# 定义激活函数g(x) ,需为无限可微函数
def sigmoid(self, x):
return 1.0 / (1 + np.exp(-x))
''' 若进行分析的训练数据为回归问题,则使用该方式 ,计算隐含层输出权值,即β '''
def regressor_train(self, T):
C = 2
I = len(T)
sub_former = np.dot(np.transpose(self.h), self.h) + I / C
all_m = np.dot(np.linalg.pinv(sub_former), np.transpose(self.h))
T = T.reshape(-1, 1)
self.beta = np.dot(all_m, T)
return self.beta
"""
计算隐含层输出权值,即β
"""
def classifisor_train(self, T):
en_one = OneHotEncoder()
# print(T)
T = en_one.fit_transform(T.reshape(-1, 1)).toarray() # 独热编码之后一定要用toarray()转换成正常的数组
# print(T)
C = 3
I = len(T)
sub_former = np.dot(np.transpose(self.h), self.h) + I / C
all_m = np.dot(np.linalg.pinv(sub_former), np.transpose(self.h))
self.beta = np.dot(all_m, T)
return self.beta
def regressor_test(self, test_x):
b_row = test_x.shape[0]
h = self.sigmoid(np.dot(test_x, self.w) + self.b[:b_row, :])
result = np.dot(h, self.beta)
return result
def classifisor_test(self, test_x):
b_row = test_x.shape[0]
h = self.sigmoid(np.dot(test_x, self.w) + self.b[:b_row, :])
result = np.dot(h, self.beta)
result = [item.tolist().index(max(item.tolist())) for item in result]
return result
feature_train,feature_test,target_train,target_test = train_test_split(newfeature,targets,test_size=0.1,random_state=30)
#feature_train=feature[0:int(len(feature)*0.95)]
feature_test=newfeature[int(len(newfeature)*0.9):int(len(newfeature))]
#target_train=targets[0:int(len(targets)*0.95)]
target_test=targets[int(len(targets)*0.9):int(len(targets))]
label_time=time_rel[int(len(time_rel)*0.9):int(len(time_rel))]
target_train=np.array(target_train)
a = HiddenLayer(feature_train,100)
a.regressor_train(target_train)
result = a.regressor_test(feature_test)
plt.plot(label_time,result)#测试数组
plt.plot(label_time,target_test)#测试数组
plt.legend(['ELM','TRUE'])
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.title("ELM") # 标题
plt.xlabel('Time')
plt.ylabel('Value')
plt.show()
print("MSE:",mean_squared_error(target_test,result))
print("R2 = ",metrics.r2_score(target_test,result)) # R2
print("MAE = ",mean_absolute_error(target_test,result)) # R2
MSE: 9.043180210662658 R2 = 0.9046692469922495 MAE = 2.345742971612385
6.PSO优化ELM参数(其实没什么卵用,碰运气而已)
class PSO:
def __init__(self, parameters):
"""
particle swarm optimization
parameter: a list type, like [NGEN, pop_size, var_num_min, var_num_max]
"""
# 初始化
self.NGEN = parameters[0] # 迭代的代数
self.pop_size = parameters[1] # 种群大小
self.var_num = len(parameters[2]) # 变量个数
self.bound = [] # 变量的约束范围
self.bound.append(parameters[2])
self.bound.append(parameters[3])
self.pop_x = np.zeros((self.pop_size, self.var_num)) # 所有粒子的位置
self.pop_v = np.zeros((self.pop_size, self.var_num)) # 所有粒子的速度
self.p_best = np.zeros((self.pop_size, self.var_num)) # 每个粒子最优的位置
self.g_best = np.zeros((1, self.var_num)) # 全局最优的位置
# 初始化第0代初始全局最优解
temp = -1
for i in range(self.pop_size):
for j in range(self.var_num):
self.pop_x[i][j] = random.uniform(self.bound[0][j], self.bound[1][j])
self.pop_v[i][j] = random.uniform(0, 1)
self.p_best[i] = self.pop_x[i] # 储存最优的个体
fit = self.fitness(self.p_best[i])
if fit > temp:
self.g_best = self.p_best[i]
temp = fit
def fitness(self, ind_var):
X = feature_train
y = target_train
"""
个体适应值计算
"""
x1 = ind_var[0]
print(int(round(x1)))
if x1==0:x1=1
if x1==2000:
print("R2 = 0.9614169669516667") # R2
return 0.9614169669516667
else:
a = HiddenLayer(feature_train,int(round(x1)))
a.regressor_train(target_train)
predictval = a.regressor_test(feature_test)
print("R2 = ",metrics.r2_score(target_test,predictval)) # R2
return metrics.r2_score(target_test,predictval)
def update_operator(self, pop_size):
"""
更新算子:更新下一时刻的位置和速度
"""
c1 = 2 # 学习因子,一般为2
c2 = 2
w = 0.4 # 自身权重因子
for i in range(pop_size):
# 更新速度
self.pop_v[i] = w * self.pop_v[i] + c1 * random.uniform(0, 1) * (
self.p_best[i] - self.pop_x[i]) + c2 * random.uniform(0, 1) * (self.g_best - self.pop_x[i])
# 更新位置
self.pop_x[i] = self.pop_x[i] + self.pop_v[i]
# 越界保护
for j in range(self.var_num):
if self.pop_x[i][j] < self.bound[0][j]:
self.pop_x[i][j] = self.bound[0][j]
if self.pop_x[i][j] > self.bound[1][j]:
self.pop_x[i][j] = self.bound[1][j]
# 更新p_best和g_best
if self.fitness(self.pop_x[i]) > self.fitness(self.p_best[i]):
self.p_best[i] = self.pop_x[i]
if self.fitness(self.pop_x[i]) > self.fitness(self.g_best):
self.g_best = self.pop_x[i]
def main(self):
popobj = []
self.ng_best = np.zeros((1, self.var_num))[0]
for gen in range(self.NGEN):
self.update_operator(self.pop_size)
popobj.append(self.fitness(self.g_best))
print('############ Generation {} ############'.format(str(gen + 1)))
if self.fitness(self.g_best) > self.fitness(self.ng_best):
self.ng_best = self.g_best.copy()
print('最好的位置:{}'.format(self.ng_best))
print('最大的函数值:{}'.format(self.fitness(self.ng_best)))
print("---- End of (successful) Searching ----")
plt.figure()
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.title("Figure1")
plt.xlabel("iterators", size=14)
plt.ylabel("fitness", size=14)
t = [t for t in range(self.NGEN)]
plt.plot(t, popobj, color='b', linewidth=2)
plt.show()
if __name__ == '__main__':
low = [0]
up = [2000]
parameters = [10, 10, low, up]
pso = PSO(parameters)
pso.main()
7.代入参数
a = HiddenLayer(feature_train,1933)
a.regressor_train(target_train)
result = a.regressor_test(feature_test)
plt.plot(label_time,result)#测试数组
plt.plot(label_time,target_test)#测试数组
plt.legend(['ELM','TRUE'])
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.title("ELM") # 标题
plt.xlabel('Time')
plt.ylabel('Value')
plt.show()
print("MSE:",mean_squared_error(target_test,result))
print("R2 = ",metrics.r2_score(target_test,result)) # R2
MSE: 6.8359109962212745 R2 = 0.9401018306505065