本文中布谷鸟算法CS、遗传算法GA、粒子群PSO已经都封装,可以直接在其他模型中进行使用
粒子群优化算法PSO
粒子群优化算法(Particle Swarm Optimization)是一种基于群体智能的优化算法,模拟了鸟群或鱼群等生物集体协同搜索的行为。
优点
- 算法简单易于理解和实现,不需要求解问题的梯度信息。
- 具有全局搜索能力,能够在复杂的搜索空间中找到全局最优解。
- 算法具有并行性,适合于分布式计算和多核处理器。
- 针对非线性、非凸、多峰和高维优化问题具有较好的适应性。
缺点
- 算法对初始粒子群的选择较为敏感,不同的初始群体可能导致不同的优化结果。
- 算法容易陷入局部最优解,对于复杂的非凸问题,可能无法找到全局最优解。
- 由于算法的随机性,其收敛性和稳定性相对较差。
- 对于高维问题,算法的收敛速度较慢,搜索性能有所下降。
封装
#PSO封装
import csv
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import explained_variance_score
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import explained_variance_score
from sklearn import metrics
from sklearn.metrics import mean_absolute_error # 平方绝对误差
import random
import pandas as pd
class PSO:
def __init__(self, parameters):
"""
particle swarm optimization
parameter: a list type, like [NGEN, pop_size, var_num_min, var_num_max]
"""
# 初始化
self.NGEN = parameters[0] # 迭代的代数
self.pop_size = parameters[1] # 种群大小
self.var_num = len(parameters[2]) # 变量个数
self.bound = [] # 变量的约束范围
self.bound.append(parameters[2])
self.bound.append(parameters[3])
self.pop_x = np.zeros((self.pop_size, self.var_num)) # 所有粒子的位置
self.pop_v = np.zeros((self.pop_size, self.var_num)) # 所有粒子的速度
self.p_best = np.zeros((self.pop_size, self.var_num)) # 每个粒子最优的位置
self.g_best = np.zeros((1, self.var_num)) # 全局最优的位置
# 初始化第0代初始全局最优解
temp = -1
for i in range(self.pop_size):
for j in range(self.var_num):
self.pop_x[i][j] = random.uniform(self.bound[0][j], self.bound[1][j])
self.pop_v[i][j] = random.uniform(0, 1)
self.p_best[i] = self.pop_x[i] # 储存最优的个体
fit = self.fitness(self.p_best[i])
if fit > temp:
self.g_best = self.p_best[i]
temp = fit
def fitness(self, ind_var):
data = pd.read_excel('C:/Users/孙海涛/Desktop/x.xlsx', sheet_name='Sheet1') # 读取数据
target = pd.read_excel('C:/Users/孙海涛/Desktop/y.xlsx', sheet_name='Sheet1') # 读取数据
x_train, x_test, y_train, y_test = train_test_split(data, target, random_state=22, test_size=0.25)
std_x = StandardScaler()
x_train = std_x.fit_transform(x_train)
x_test = std_x.transform(x_test)
std_y = StandardScaler()
y_train = std_y.fit_transform(y_train)
y_test = std_y.transform(y_test)
y_test = std_y.inverse_transform(y_test)
"""
个体适应值计算
"""
x1 = ind_var[0]
x2 = ind_var[1]
x3 = ind_var[2]
if x1 == 0: x1 = 0.001
if x2 == 0: x2 = 0.001
if x3 == 0: x3 = 0.001
model_svr = SVR(C=x1, epsilon=x2, gamma=x3)
model_svr.fit(x_train, y_train)
predict_results = std_y.inverse_transform(model_svr.predict(x_test)) # 预测结果
print("R2 = ", metrics.r2_score(y_test, predict_results)) # R2
return metrics.r2_score(y_test, predict_results)
def update_operator(self, pop_size):
"""
更新算子:更新下一时刻的位置和速度
"""
c1 = 2 # 学习因子,一般为2
c2 = 2
w = 0.4 # 自身权重因子
for i in range(pop_size):
# 更新速度
self.pop_v[i] = w * self.pop_v[i] + c1 * random.uniform(0, 1) * (
self.p_best[i] - self.pop_x[i]) + c2 * random.uniform(0, 1) * (self.g_best - self.pop_x[i])
# 更新位置
self.pop_x[i] = self.pop_x[i] + self.pop_v[i]
# 越界保护
for j in range(self.var_num):
if self.pop_x[i][j] < self.bound[0][j]:
self.pop_x[i][j] = self.bound[0][j]
if self.pop_x[i][j] > self.bound[1][j]:
self.pop_x[i][j] = self.bound[1][j]
# 更新p_best和g_best
if self.fitness(self.pop_x[i]) > self.fitness(self.p_best[i]):
self.p_best[i] = self.pop_x[i]
if self.fitness(self.pop_x[i]) > self.fitness(self.g_best):
self.g_best = self.pop_x[i]
def main(self):
popobj = []
self.ng_best = np.zeros((1, self.var_num))[0]
for gen in range(self.NGEN):
self.update_operator(self.pop_size)
popobj.append(self.fitness(self.g_best))
print('############ Generation {} ############'.format(str(gen + 1)))
if self.fitness(self.g_best) > self.fitness(self.ng_best):
self.ng_best = self.g_best.copy()
print('最好的位置:{}'.format(self.ng_best))
print('最大的函数值:{}'.format(self.fitness(self.ng_best)))
print("---- End of (successful) Searching ----")
# plt.figure()
# fig = plt.gcf()
# fig.set_size_inches(18.5, 10.5)
# plt.title("Figure1")
# plt.xlabel("iterators", size=14)
# plt.ylabel("fitness", size=14)
# t = [t for t in range(self.NGEN)]
# plt.plot(t, popobj, color='b', linewidth=2)
# plt.show()
plt.figure()
plt.title("PSO-SVM")
plt.xlabel("GENS", size=14)
plt.ylabel("R2", size=14)
t = [t for t in range(self.NGEN)]
plt.plot(t, popobj, 'b', linewidth=2)
plt.show()
if __name__ == '__main__':
NGEN = 200
popsize = 20
low = [0,0.01,0]
up = [200,0.01,50]
parameters = [NGEN, popsize, low, up]
pso = PSO(parameters)
pso.main()
遗传算法GA
遗传算法(Genetic Algorithm)是一种基于生物进化观念的优化算法,它模拟自然界的进化过程,通过种群的遗传、变异和选择来搜索优化问题的解。
优点
- 可以搜索复杂的问题空间:遗传算法能够应用于各种类型的问题,包括连续、离散、整数和混合型问题。它可以搜索复杂的问题空间,找到全局最优解或接近最优解的解。
- 并行化能力强:遗传算法的并行计算能力强,可以利用多个处理器或计算机同时进行计算,加快搜索过程。
- 不依赖问题的具体形式:遗传算法不需要知道问题的具体形式,只需要知道解的表示形式和适应度函数,因此可以适用于各种问题。
- 可以处理多目标问题:遗传算法可以处理多目标优化问题,通过引入多个适应度函数和多种选择策略,使种群中存在多个非劣解。
缺点
- 运算速度较慢:由于遗传算法需要进行大量的计算和评估,所以在处理大规模优化问题时,其运算速度相对较慢。
- 参数选择困难:遗传算法需要选择合适的参数,包括交叉率、变异率等,这需要经验和实验来确定,而且参数的选择对算法的性能有很大影响。
- 需要合适的编码方式:遗传算法需要将问题的解进行编码,而且编码方式对算法的性能影响较大。选择不合适的编码方式可能导致算法无法收敛或收敛速度很慢。
- 可能陷入局部最优解:遗传算法是一种启发式搜索算法,它可能陷入局部最优解而无法找到全局最优解,特别是在参数选择不当的情况下。
封装
import csv
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import explained_variance_score
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import explained_variance_score
from sklearn import metrics
from sklearn.metrics import mean_absolute_error # 平方绝对误差
import random
import pandas as pd
# 设置适应度,这里设置为R2
def msefunc(predictval, realval):
print("R2 = ", metrics.r2_score(realval, predictval)) # R2
return metrics.r2_score(realval, predictval)
# 设置优化函数,这里为SVR,参数在此绑定,使用验证集输入验证得出适应度
def SVMResult(vardim, x, bound):
data = pd.read_excel('C:/Users/孙海涛/Desktop/x.xlsx', sheet_name='Sheet1') # 读取数据
target = pd.read_excel('C:/Users/孙海涛/Desktop/y.xlsx', sheet_name='Sheet1') # 读取数据
x_train, x_test, y_train, y_test = train_test_split(data, target, random_state=22, test_size=0.25)
std_x = StandardScaler()
x_train = std_x.fit_transform(x_train)
x_test = std_x.transform(x_test)
std_y = StandardScaler()
y_train = std_y.fit_transform(y_train)
y_test = std_y.transform(y_test)
y_test = std_y.inverse_transform(y_test)
c = x[0]
e = x[1]
g = x[2]
model_svr = SVR(C=c, epsilon=e, gamma=g)
model_svr.fit(x_train, y_train)
predict_results = std_y.inverse_transform(model_svr.predict(x_test)) # 预测结果
return msefunc(predict_results, y_test)
class GAIndividual:
'''
individual of genetic algorithm
'''
def __init__(self, vardim, bound):
'''
vardim: dimension of variables
bound: boundaries of variables
'''
self.vardim = vardim
self.bound = bound
self.fitness = 0.
def generate(self):
'''
generate a random chromsome for genetic algorithm
'''
len = self.vardim
rnd = np.random.random(size=len)
self.chrom = np.zeros(len)
for i in range(0, len):
self.chrom[i] = self.bound[0, i] + \
(self.bound[1, i] - self.bound[0, i]) * rnd[i]
def calculateFitness(self):
'''
calculate the fitness of the chromsome
'''
self.fitness = SVMResult(self.vardim, self.chrom, self.bound)
import random
import copy
class GeneticAlgorithm:
'''
The class for genetic algorithm
'''
def __init__(self, sizepop, vardim, bound, MAXGEN, params):
'''
sizepop: population sizepop人口规模
vardim: dimension of variables变量维数
bound: boundaries of variables变量边界
MAXGEN: termination condition终止条件
param: algorithm required parameters, it is a list which is consisting of crossover rate, mutation rate, alpha
'''
self.sizepop = sizepop
self.MAXGEN = MAXGEN
self.vardim = vardim
self.bound = bound
self.population = []
self.fitness = np.zeros((self.sizepop, 1))
self.trace = np.zeros((self.MAXGEN, 3))
self.params = params
def initialize(self):
'''
initialize the population
'''
for i in range(0, self.sizepop):
ind = GAIndividual(self.vardim, self.bound)
ind.generate()
self.population.append(ind)
def evaluate(self):
'''
evaluation of the population fitnesses
'''
for i in range(0, self.sizepop):
self.population[i].calculateFitness()
self.fitness[i] = self.population[i].fitness
def solve(self):
'''
evolution process of genetic algorithm
'''
self.t = 0
self.initialize()
self.evaluate()
best = np.max(self.fitness)
bestIndex = np.argmax(self.fitness)
self.best = copy.deepcopy(self.population[bestIndex])
self.avefitness = np.mean(self.fitness)
self.maxfitness = np.max(self.fitness)
self.trace[self.t, 0] = self.best.fitness
self.trace[self.t, 1] = self.avefitness
self.trace[self.t, 2] = self.maxfitness
print("Generation %d: optimal function value is: %f; average function value is %f;max function value is %f" % (
self.t, self.trace[self.t, 0], self.trace[self.t, 1], self.trace[self.t, 2]))
while (self.t < self.MAXGEN - 1):
self.t += 1
self.selectionOperation()
self.crossoverOperation()
self.mutationOperation()
self.evaluate()
best = np.max(self.fitness)
bestIndex = np.argmax(self.fitness)
if best > self.best.fitness:
self.best = copy.deepcopy(self.population[bestIndex])
self.avefitness = np.mean(self.fitness)
self.maxfitness = np.max(self.fitness)
self.trace[self.t, 0] = self.best.fitness
self.trace[self.t, 1] = self.avefitness
self.trace[self.t, 2] = self.maxfitness
print(
"Generation %d: optimal function value is: %f; average function value is %f;max function value is %f" % (
self.t, self.trace[self.t, 0], self.trace[self.t, 1], self.trace[self.t, 2]))
print("Optimal function value is: %f; " %
self.trace[self.t, 0])
print("Optimal solution is:")
print(self.best.chrom)
self.printResult()
def selectionOperation(self):
'''
selection operation for Genetic Algorithm
'''
newpop = []
totalFitness = np.sum(self.fitness)
accuFitness = np.zeros((self.sizepop, 1))
sum1 = 0.
for i in range(0, self.sizepop):
accuFitness[i] = sum1 + self.fitness[i] / totalFitness
sum1 = accuFitness[i]
for i in range(0, self.sizepop):
r = random.random()
idx = 0
for j in range(0, self.sizepop - 1):
if j == 0 and r < accuFitness[j]:
idx = 0
break
elif r >= accuFitness[j] and r < accuFitness[j + 1]:
idx = j + 1
break
newpop.append(self.population[idx])
self.population = newpop
def crossoverOperation(self):
'''
crossover operation for genetic algorithm
'''
newpop = []
for i in range(0, self.sizepop, 2):
idx1 = random.randint(0, self.sizepop - 1)
idx2 = random.randint(0, self.sizepop - 1)
while idx2 == idx1:
idx2 = random.randint(0, self.sizepop - 1)
newpop.append(copy.deepcopy(self.population[idx1]))
newpop.append(copy.deepcopy(self.population[idx2]))
r = random.random()
if r < self.params[0]:
crossPos = random.randint(1, self.vardim - 1)
for j in range(crossPos, self.vardim):
newpop[i].chrom[j] = newpop[i].chrom[
j] * self.params[2] + (1 - self.params[2]) * newpop[i + 1].chrom[j]
newpop[i + 1].chrom[j] = newpop[i + 1].chrom[j] * self.params[2] + \
(1 - self.params[2]) * newpop[i].chrom[j]
self.population = newpop
def mutationOperation(self):
'''
mutation operation for genetic algorithm
'''
newpop = []
for i in range(0, self.sizepop):
newpop.append(copy.deepcopy(self.population[i]))
r = random.random()
if r < self.params[1]:
mutatePos = random.randint(0, self.vardim - 1)
theta = random.random()
if theta > 0.5:
newpop[i].chrom[mutatePos] = newpop[i].chrom[
mutatePos] - (
newpop[i].chrom[mutatePos] - self.bound[0, mutatePos]) * (
1 - random.random() ** (1 - self.t / self.MAXGEN))
else:
newpop[i].chrom[mutatePos] = newpop[i].chrom[
mutatePos] + (
self.bound[1, mutatePos] - newpop[i].chrom[mutatePos]) * (
1 - random.random() ** (1 - self.t / self.MAXGEN))
self.population = newpop
def printResult(self):
'''
plot the result of the genetic algorithm
'''
x = np.arange(0, self.MAXGEN)
y1 = self.trace[:, 0]
y2 = self.trace[:, 1]
y3 = self.trace[:, 2]
# plt.plot(x, y1, 'r', label='optimal value')
# plt.plot(x, y2, 'g', label='average value')
# plt.plot(x, y3, 'b', label='max value')
# plt.plot(x, y3, 'b',linewidth=2)
# fig = plt.gcf()
# fig.set_size_inches(18.5, 10.5)
# plt.xlabel("GENS", size=14)
# plt.ylabel("R2", size=14)
# plt.title("GA-SVM")
# # plt.legend()
# plt.show()
plt.figure()
plt.title("GA-SVM")
plt.xlabel("GENS", size=14)
plt.ylabel("R2", size=14)
plt.plot(x, y3, 'b',linewidth=2)
plt.show()
# Generation 19: optimal function value is: 0.796173; average function value is 0.655682;max function value is 0.710410
# Optimal function value is: 0.796173;
# Optimal solution is:
# [ 6.73356933 0.29407187 48.58751446]
if __name__ == "__main__":
bound = np.array([[0,0.01,0],[200,0.01,50]])
ga = GeneticAlgorithm(20, 3, bound, 3, [0.7, 0.0175, 0.5])
ga.solve()
布谷鸟算法CS
布谷鸟算法(Cuckoo Search Algorithm)是一种启发式算法,灵感来自布谷鸟筑巢的行为。该算法主要用于求解优化问题,包括连续优化问题和离散优化问题。
优点
-
具有全局搜索能力:布谷鸟算法通过随机生成的鸟巢来搜索问题的解空间,能够在整个搜索空间中找到全局最优解。
-
简单易实现:布谷鸟算法相对于其他优化算法而言,其实现相对简单。算法的核心思想是模拟布谷鸟的筑巢行为,只需要定义好适应度函数和更新策略即可。
-
搜索效率高:由于布谷鸟算法采用随机生成鸟巢的方式进行搜索,可以在较短的时间内找到问题的较优解,尤其适用于高维复杂问题的求解。
缺点
-
收敛速度慢:布谷鸟算法在搜索过程中容易陷入局部最优解,而且收敛速度相对较慢。这是因为算法只是通过随机生成鸟巢进行搜索,没有引入其他启发式信息进行指导。
-
参数选择较为困难:布谷鸟算法中存在一些重要的参数,例如种群大小、巢蛋数量等,这些参数的选择对算法的性能有重要影响。但是,如何选择合适的参数仍然是一个挑战。
-
对问题的建模要求高:布谷鸟算法需要将优化问题进行数学建模,并定义适应度函数。对于某些复杂的问题,建模和定义适应度函数可能会比较困难。
封装
import numpy as np
import scipy.special as sc_special
import matplotlib.pyplot as plt #matplotlib的pyplot模块一般是最常用的,可以方便用户快速绘制二位图表
from matplotlib import cm #matplotlib是python最著名的绘图库
from mpl_toolkits.mplot3d import Axes3D #3D绘图
"""
Cuckoo search function
---------------------------------------------------
Input parameters:
n: 巢的数量
m: 维数
fit_func: 适用度函数
lower_boundary: 下边界
upper_boundary: 上边界
iter_num: 迭代次数 (默认: 100)
pa: 被宿主发现蛋的概率 (default: 0.25)
beta:与问题规模相关的步长比例因子 (note: 1 < beta < 2) (default: 1.5)
step_size: 与问题规模相关的步长比例因子 (default: 0.1)
Output:
最佳解决方案及值
"""
#绘制图像
def plot_3d(ax):
x = np.arange(-3, 3, 0.1) #在指定的间隔内返回均匀间隔的数字
y = np.arange(-3, 3, 0.1)
x,y = np.meshgrid(x, y)
z = 3*(1-x)**2*np.e**(-x**2-(y+1)**2) - 10*(x/5-x**3-y**5)*np.e**(-x**2-y**2) - (np.e**(-(x+1)**2-y**2))/3
ax.plot_surface(x,y,z,rstride=1,cstride=1,cmap=cm.coolwarm)# rstride:行之间的跨度 cstride:列之间的跨度,cmap是颜色映射表
ax.set_zlim(-10,10) ##坐标系的下边界和上边界
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
plt.pause(3) #延时
plt.show() #显示
def fit_func(nest):
x, y = nest
return 3*(1-x)**2*np.e**(-x**2-(y+1)**2) - 10*(x/5-x**3-y**5)*np.e**(-x**2-y**2) - (np.e**(-(x+1)**2-y**2))/3
#计算适应度
def calc_fitness(fit_func, nests):
n, m = nests.shape
fitness = np.empty(n)
for each_nest in range(n):
fitness[each_nest] = fit_func(nests[each_nest])
return fitness
#主要过程
def cuckoo_search(n, m, fit_func, lower_boundary, upper_boundary, iter_num = 100,pa = 0.25, beta = 1.5, step_size = 0.1):
# 得到最初蛋的位置
nests = generate_nests(n, m, lower_boundary, upper_boundary)
fitness = calc_fitness(fit_func, nests)
# 得到最好的巢的位置并更新
best_nest_index = np.argmax(fitness)
best_fitness = fitness[best_nest_index]
best_nest = nests[best_nest_index].copy()
for _ in range(iter_num):
nests = update_nests(fit_func, lower_boundary, upper_boundary, nests, best_nest, fitness, step_size)
nests = abandon_nests(nests, lower_boundary, upper_boundary, pa)
fitness = calc_fitness(fit_func, nests)
x=np.empty((n, 1))
y=np.empty((n, 1))
z=np.empty((n, 1))
for i in range(20):
x[i],y[i]=nests[i]
z=3*(1-x)**2*np.e**(-x**2-(y+1)**2) - 10*(x/5-x**3-y**5)*np.e**(-x**2-y**2) - (np.e**(-(x+1)**2-y**2))/3
if 'sca' in locals():
sca.remove()
sca = ax.scatter(x, y, z, c='black', marker='o');
plt.show();
plt.pause(0.1) #ax.scatter特征值散点图
max_nest_index = np.argmax(fitness)
max_fitness = fitness[max_nest_index]
max_nest = nests[max_nest_index]
if (max_fitness > best_fitness):
best_nest = max_nest.copy()
best_fitness = max_fitness
return (best_nest, best_fitness)
#生成巢穴位置
def generate_nests(n, m, lower_boundary, upper_boundary):
lower_boundary = np.array(lower_boundary) #转成矩阵形式
upper_boundary = np.array(upper_boundary)
nests = np.empty((n, m)) #生成n行m列的数组
for each_nest in range(n):
nests[each_nest] = lower_boundary + np.array([np.random.rand() for _ in range(m)]) * (upper_boundary - lower_boundary) #生成在[-3,3]范围内的下x,y的n个样本,即20*2的矩阵
return nests
#获取新的巢穴位置并用好的替换掉旧的不好的
def update_nests(fit_func, lower_boundary, upper_boundary, nests, best_nest, fitness, step_coefficient):
lower_boundary = np.array(lower_boundary)
upper_boundary = np.array(upper_boundary)
n, m = nests.shape
# 使用莱维飞行产生步长
steps = levy_flight(n, m, 1.5)
new_nests = nests.copy()
for each_nest in range(n):
# coefficient 0.01 is to avoid levy flights becoming too aggresive
# and (nest[each_nest] - best_nest) could let the best nest be remained
step_size = step_coefficient * steps[each_nest] * (nests[each_nest] - best_nest)
step_direction = np.random.rand(m)
new_nests[each_nest] += step_size * step_direction
# apply boundary condtions
new_nests[each_nest][new_nests[each_nest] < lower_boundary] = lower_boundary[new_nests[each_nest] < lower_boundary]
new_nests[each_nest][new_nests[each_nest] > upper_boundary] = upper_boundary[new_nests[each_nest] > upper_boundary]
new_fitness = calc_fitness(fit_func, new_nests)
nests[new_fitness > fitness] = new_nests[new_fitness > fitness]
return nests
#卵被丢弃寻找新巢
def abandon_nests(nests, lower_boundary, upper_boundary, pa):
lower_boundary = np.array(lower_boundary)
upper_boundary = np.array(upper_boundary)
n, m = nests.shape
for each_nest in range(n):
if (np.random.rand() < pa):
step_size = np.random.rand() * (nests[np.random.randint(0, n)] - nests[np.random.randint(0, n)])
nests[each_nest] += step_size
# apply boundary condtions
nests[each_nest][nests[each_nest] < lower_boundary] = lower_boundary[nests[each_nest] < lower_boundary]
nests[each_nest][nests[each_nest] > upper_boundary] = upper_boundary[nests[each_nest] > upper_boundary]
return nests
#计算莱维飞行
def levy_flight(n, m, beta):
sigma_u = (sc_special.gamma(1+beta)*np.sin(np.pi*beta/2)/(sc_special.gamma((1+beta)/2)*beta*(2**((beta-1)/2))))**(1/beta)
sigma_v = 1
u = np.random.normal(0, sigma_u, (n, m))
v = np.random.normal(0, sigma_v, (n, m))
steps = u/((np.abs(v))**(1/beta)) #steps为20*2的矩阵
return steps
#主函数
if __name__=='__main__':
fig = plt.figure() #绘制背景图
ax = Axes3D(fig)
plt.ion()#将画图模式改为交互模式,程序遇到plt.show不会暂停,而是继续执行
plot_3d(ax)
best_nest, best_fitness = cuckoo_search(25, 2, fit_func, [-3, -3], [3, 3], step_size = 0.4)
print('最大值为:%.5f, 在(%.5f, %.5f)处取到!'%(best_fitness, best_nest[0], best_nest[1]))
plt.ioff() #将画图交互模式关闭
plot_3d(ax)
支持向量机回归算法SVR
支持向量机回归算法(Support Vector Regression)是一种基于支持向量机的回归方法。与传统的回归算法不同,SVR 不仅仅寻求一个最优线性或非线性回归方程,而是寻求一个最优的边界(超平面),使得所有数据点到该超平面的距离都小于一个预先定义的容忍度。
优点
- 非线性关系处理能力强:可以通过使用不同的核函数,将输入空间映射到高维特征空间,从而解决非线性回归问题。
- 对异常值不敏感:由于回归目标是最大化间隔,因此对于异常值有一定的容忍度,不会完全受其影响。
- 支持多维特征:可以处理多维特征的回归问题,并且不会出现过拟合问题。
缺点
- 对参数的依赖性:需要选择合适的核函数和相关参数,如果选择不当,可能会导致模型的性能下降。
- 计算复杂度较高:回归算法的计算复杂度与训练样本数目相关,在大样本数据集上训练时间较长。
- 对缺失数据敏感:算法对缺失数据敏感,如果数据中存在缺失值,需要进行额外的数据处理。
- 可解释性较差:回归算法是一种黑盒模型,不容易解释模型的预测结果。
封装
import csv
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import explained_variance_score
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import explained_variance_score
from sklearn import metrics
from sklearn.metrics import mean_absolute_error # 平方绝对误差
import random
import pandas as pd
def linear2():
data = pd.read_excel('C:/Users/孙海涛/Desktop/x.xlsx', sheet_name='Sheet1') # 读取数据
target = pd.read_excel('C:/Users/孙海涛/Desktop/y.xlsx', sheet_name='Sheet1') # 读取数据
x_train, x_test, y_train, y_test = train_test_split(data, target, random_state=22, test_size=0.25)
std_x = StandardScaler()
x_train = std_x.fit_transform(x_train)
x_test = std_x.transform(x_test)
std_y = StandardScaler()
y_train = std_y.fit_transform(y_train)
y_test = std_y.transform(y_test)
y_test = std_y.inverse_transform(y_test)
# model_svr = SVR(C=1, epsilon=0.1, gamma=10)
# model_svr = SVR(C=148.990417, epsilon=0.01, gamma=0.616337768)#ga
# model_svr = SVR(C=16.81477247, epsilon=0.01, gamma=0.46380986) # cs
model_svr = SVR(C=17.0006218, epsilon=0.01, gamma=0.462968607) # pos
model_svr.fit(x_train, y_train)
predict_results = std_y.inverse_transform(model_svr.predict(x_test))#预测结果
error1 = metrics.r2_score(y_test, predict_results)
MAPE = metrics.mean_absolute_percentage_error(y_test, predict_results)
RMSE = metrics.mean_squared_error(y_test, predict_results) ** 0.5
print("R Squared误差为:\n", error1)
print("MAPE误差为:\n", MAPE)
print("RMSE误差为:\n", RMSE)
#绘图
plt.scatter(y_test, predict_results)
plt.plot([0,500],[0,500],'--',color='r')
plt.show()
if __name__ == '__main__':
linear2()
岭回归
岭回归是一种用于解决多重共线性问题的机器学习算法
优点
- 可以很好地处理多重共线性问题。当特征之间存在高度相关性时,岭回归可以降低特征系数的方差,提高模型的稳定性和泛化能力。
- 可以使用L2正则化来防止过拟合。通过对特征系数进行惩罚,岭回归可以避免模型对训练数据过度拟合,从而提高模型的泛化能力。
- 对于小样本数据集,岭回归可以提供较好的预测性能。由于岭回归可以通过缩小特征系数来降低模型的复杂度,使得在小样本情况下也能获得较好的模型性能。
缺点
- 岭回归无法选择最优的惩罚力度。在实际应用中,需要手动调节岭回归中的超参数,如正则化系数,来控制特征系数的缩小程度。这需要一定的经验和试错来选择最优的参数。
- 对于大型数据集,岭回归的计算复杂度较高。由于岭回归需要通过求解正规方程来估计特征系数,计算量较大。对于大规模数据集,可能需要使用更高效的算法来加快计算速度。
- 岭回归在特征选择方面的表现较弱。由于岭回归会对所有的特征进行惩罚,即使某些特征与目标变量没有关联,它们仍然会有较大的系数。因此,在特征选择的场景下,岭回归的表现可能不如其他相关算法。
封装
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,SGDRegressor,Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd
from sklearn.metrics import r2_score#R square
import time
from sklearn import metrics
import numpy as np
def linear3():
#岭回归
start = time.time()
# 梯度下降
# 获取数据
data = pd.read_excel('C:/Users/孙海涛/Desktop/x.xlsx', sheet_name='Sheet1') # 读取数据
target = pd.read_excel('C:/Users/孙海涛/Desktop/y.xlsx', sheet_name='Sheet1') # 读取数据
# 划分数据集
x_train, x_test, y_train, y_test = train_test_split(data, target, random_state=22, test_size=0.25)
# print(x_train)
# 进行可视化
# plt.scatter(data[0],target)
# plt.show()
# 需要做标准化处理对于特征值处理
std_x = StandardScaler()
x_train = std_x.fit_transform(x_train)
x_test = std_x.transform(x_test)
# 对于目标值进行标准化
std_y = StandardScaler()
y_train = std_y.fit_transform(y_train)
y_test = std_y.transform(y_test)
y_test = std_y.inverse_transform(y_test)
# 预估器
estimator = Ridge(alpha=0.99244312)
estimator.fit(x_train, y_train)
# 得出模型
# print("岭回归权重系数为:\n", estimator.coef_)
# print("岭回归偏值为:\n", estimator.intercept_)
# 进行可视化
# plt.scatter(x_train,y_train)
# plt.plot(x_train,y_train,color='r')
# plt.show()
# 模型评估
y_predict = std_y.inverse_transform(estimator.predict(x_test))
end = time.time()
# error = mean_squared_error(y_test,y_predict)
# print("正规方程-均方误差为:\n",error)
# 用R^2来判断模型的拟合程度
error1 = r2_score(y_test, y_predict)
print("R Squared误差为:\n", error1)
# mape和rmse MAPE来判断预测值的误差
MAPE = metrics.mean_absolute_percentage_error(y_test, y_predict)
RMSE = metrics.mean_squared_error(y_test, y_predict) ** 0.5
print("MAPE误差为:\n", MAPE)
print("RMSE误差为:\n", RMSE)
# plt.scatter(y_test, y_predict)
# plt.plot([0, 500], [0, 500], '--', color='r')
# plt.show()
# print("程序process_1的运行时间为:{}".format(end - start))
return None
if __name__ == '__main__':
linear3()
正规方程
正规方程是解决线性最小二乘问题的一种方法。它通过求解导数为零的方程来找到使得误差最小的参数估计。
优点
- 理论基础:正规方程是通过求解导数为零的方程来得到最小二乘估计的闭式解,具有较强的理论支持。
- 精确解:正规方程可以得到最小二乘估计的精确解,无需迭代过程,计算速度相对较快。
- 可解释性:正规方程给出了解析解,可以直接得到对模型参数的解释和理解。
缺点
- 计算复杂度高:求解正规方程需要计算矩阵的逆,当矩阵规模较大时,计算复杂度较高,耗费时间和计算资源。
- 存在矩阵逆的问题:当矩阵不可逆或接近奇异时,求解正规方程可能会失败。
- 效果受数据噪声影响:正规方程对数据中的噪声敏感,当数据中存在噪声时,最小二乘估计可能会受到影响。
封装
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,SGDRegressor,Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd
from sklearn.metrics import r2_score#R square
import time
from sklearn import metrics
import numpy as np
def linear1():
start = time.time()
#正规方程
#获取数据
data = pd.read_excel('C:/Users/孙海涛/Desktop/x.xlsx', sheet_name='Sheet1') # 读取数据
target = pd.read_excel('C:/Users/孙海涛/Desktop/y.xlsx', sheet_name='Sheet1') # 读取数据
#划分数据集
x_train,x_test,y_train,y_test = train_test_split(data,target,random_state=22,test_size=0.25)
# print(x_train)
# 进行可视化
# plt.scatter(data[0],target)
# plt.show()
#需要做标准化处理对于特征值处理
std_x =StandardScaler()
x_train = std_x.fit_transform(x_train)
x_test = std_x.transform(x_test)
# 对于目标值进行标准化
std_y = StandardScaler()
y_train = std_y.fit_transform(y_train)
y_test = std_y.transform(y_test)
y_test = std_y.inverse_transform(y_test)
#预估器
estimator = LinearRegression()
estimator.fit(x_train,y_train)
#得出模型
# print("正规方程权重系数为:\n",estimator.coef_)
# print("正规方程偏值为:\n",estimator.intercept_)
#进行可视化
# plt.scatter(x_train,y_train)
# plt.plot(x_train,y_train,color='r')
# plt.show()
#模型评估
y_predict = std_y.inverse_transform(estimator.predict(x_test))
end = time.time()
# error = mean_squared_error(y_test,y_predict)
# print("正规方程-均方误差为:\n",error)
#用R^2来判断模型的拟合程度
error1 = r2_score(y_test, y_predict)
print("R Squared误差为:\n", error1)
#mape和rmse MAPE来判断预测值的误差
MAPE = metrics.mean_absolute_percentage_error(y_test, y_predict)
RMSE = metrics.mean_squared_error(y_test, y_predict) ** 0.5
# plt.scatter(y_test, y_predict)
# plt.plot([0,500],[0,500],'--',color='r')
# plt.show()
print("MAPE误差为:\n", MAPE)
print("RMSE误差为:\n", RMSE)
# print("程序process_1的运行时间为:{}".format(end - start))
return None
if __name__ == '__main__':
linear1()
随机梯度下降
随机梯度下降(Stochastic Gradient Descent, SGD)是一种常用的机器学习优化算法,用于训练模型的参数。
优点
- 速度快:相比于传统的梯度下降算法,SGD每次只使用一个样本进行参数更新,计算速度更快。
- 内存消耗小:由于每次只使用一个样本进行更新,所以内存消耗较小,适用于大规模数据集。
- 适用于在线学习:SGD适用于在线学习问题,可以边接收新样本边更新模型参数。
- 可用于非凸优化:SGD对于非凸优化问题也能有较好的效果。
缺点
- 不稳定性:由于SGD每次只使用一个样本进行参数更新,导致参数的更新过程非常不稳定,可能会出现震荡现象。
- 可能会收敛到局部最优解:由于参数的更新是根据随机选择的样本进行的,可能会导致收敛到局部最优解而不是全局最优解。
- 需要合适的学习率:SGD对学习率非常敏感,需要合适的学习率才能保证算法快速收敛。
- 需要进行参数调整:在应用SGD算法时,需要进行一些参数的调整,如学习率、正则化参数等。
封装
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,SGDRegressor,Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd
from sklearn.metrics import r2_score#R square
import time
from sklearn import metrics
import numpy as np
def linear2():
# 梯度下降
start = time.time()
# 梯度下降
# 获取数据
data = pd.read_excel('C:/Users/孙海涛/Desktop/x.xlsx', sheet_name='Sheet1') # 读取数据
target = pd.read_excel('C:/Users/孙海涛/Desktop/y.xlsx', sheet_name='Sheet1') # 读取数据
# 划分数据集
x_train, x_test, y_train, y_test = train_test_split(data, target, random_state=22, test_size=0.25)
# print(x_train)
# 进行可视化
# plt.scatter(data[0],target)
# plt.show()
# 需要做标准化处理对于特征值处理
std_x = StandardScaler()
x_train = std_x.fit_transform(x_train)
x_test = std_x.transform(x_test)
# 对于目标值进行标准化
std_y = StandardScaler()
y_train = std_y.fit_transform(y_train)
y_test = std_y.transform(y_test)
y_test = std_y.inverse_transform(y_test)
# 预估器
estimator = SGDRegressor(alpha=0.055935631,max_iter=1000)
estimator.fit(x_train, y_train)
# 得出模型
# print("梯度下降权重系数为:\n", estimator.coef_)
# print("梯度下降偏值为:\n", estimator.intercept_)
# 进行可视化
# plt.scatter(x_train,y_train)
# plt.plot(x_train,y_train,color='r')
# plt.show()
# 模型评估
y_predict = std_y.inverse_transform(estimator.predict(x_test))
end = time.time()
# error = mean_squared_error(y_test,y_predict)
# print("正规方程-均方误差为:\n",error)
# 用R^2来判断模型的拟合程度
error1 = r2_score(y_test, y_predict)
print("R Squared误差为:\n", error1)
# mape和rmse MAPE来判断预测值的误差
MAPE = metrics.mean_absolute_percentage_error(y_test, y_predict)
RMSE = metrics.mean_squared_error(y_test, y_predict) ** 0.5
print("MAPE误差为:\n", MAPE)
print("RMSE误差为:\n", RMSE)
# plt.scatter(y_test, y_predict)
# plt.plot([0, 500], [0, 500], '--', color='r')
# plt.show()
# print("程序process_1的运行时间为:{}".format(end - start))
return None
if __name__ == '__main__':
linear2()