【python】布谷鸟算法CS、遗传算法GA、粒子群PSO优化支持向量机回归SVR,附带常用线性回归:岭回归+正规方程+随机梯度下降


本文中布谷鸟算法CS、遗传算法GA、粒子群PSO已经都封装,可以直接在其他模型中进行使用

粒子群优化算法PSO

粒子群优化算法(Particle Swarm Optimization)是一种基于群体智能的优化算法,模拟了鸟群或鱼群等生物集体协同搜索的行为。

优点

  1. 算法简单易于理解和实现,不需要求解问题的梯度信息。
  2. 具有全局搜索能力,能够在复杂的搜索空间中找到全局最优解。
  3. 算法具有并行性,适合于分布式计算和多核处理器。
  4. 针对非线性、非凸、多峰和高维优化问题具有较好的适应性。

缺点

  1. 算法对初始粒子群的选择较为敏感,不同的初始群体可能导致不同的优化结果。
  2. 算法容易陷入局部最优解,对于复杂的非凸问题,可能无法找到全局最优解。
  3. 由于算法的随机性,其收敛性和稳定性相对较差。
  4. 对于高维问题,算法的收敛速度较慢,搜索性能有所下降。

封装

#PSO封装
import csv
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import explained_variance_score
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import explained_variance_score
from sklearn import metrics
from sklearn.metrics import mean_absolute_error # 平方绝对误差
import random
import pandas as pd



class PSO:
    def __init__(self, parameters):
        """
        particle swarm optimization
        parameter: a list type, like [NGEN, pop_size, var_num_min, var_num_max]
        """
        # 初始化
        self.NGEN = parameters[0]  # 迭代的代数
        self.pop_size = parameters[1]  # 种群大小
        self.var_num = len(parameters[2])  # 变量个数
        self.bound = []  # 变量的约束范围
        self.bound.append(parameters[2])
        self.bound.append(parameters[3])

        self.pop_x = np.zeros((self.pop_size, self.var_num))  # 所有粒子的位置
        self.pop_v = np.zeros((self.pop_size, self.var_num))  # 所有粒子的速度
        self.p_best = np.zeros((self.pop_size, self.var_num))  # 每个粒子最优的位置
        self.g_best = np.zeros((1, self.var_num))  # 全局最优的位置

        # 初始化第0代初始全局最优解
        temp = -1
        for i in range(self.pop_size):
            for j in range(self.var_num):
                self.pop_x[i][j] = random.uniform(self.bound[0][j], self.bound[1][j])
                self.pop_v[i][j] = random.uniform(0, 1)
            self.p_best[i] = self.pop_x[i]  # 储存最优的个体
            fit = self.fitness(self.p_best[i])
            if fit > temp:
                self.g_best = self.p_best[i]
                temp = fit

    def fitness(self, ind_var):

        data = pd.read_excel('C:/Users/孙海涛/Desktop/x.xlsx', sheet_name='Sheet1')  # 读取数据
        target = pd.read_excel('C:/Users/孙海涛/Desktop/y.xlsx', sheet_name='Sheet1')  # 读取数据
        x_train, x_test, y_train, y_test = train_test_split(data, target, random_state=22, test_size=0.25)

        std_x = StandardScaler()
        x_train = std_x.fit_transform(x_train)
        x_test = std_x.transform(x_test)

        std_y = StandardScaler()
        y_train = std_y.fit_transform(y_train)
        y_test = std_y.transform(y_test)
        y_test = std_y.inverse_transform(y_test)

        """
        个体适应值计算
        """
        x1 = ind_var[0]
        x2 = ind_var[1]
        x3 = ind_var[2]

        if x1 == 0: x1 = 0.001
        if x2 == 0: x2 = 0.001
        if x3 == 0: x3 = 0.001

        model_svr = SVR(C=x1, epsilon=x2, gamma=x3)
        model_svr.fit(x_train, y_train)
        predict_results = std_y.inverse_transform(model_svr.predict(x_test))  # 预测结果

        print("R2 = ", metrics.r2_score(y_test, predict_results))  # R2
        return metrics.r2_score(y_test, predict_results)

    def update_operator(self, pop_size):
        """
        更新算子:更新下一时刻的位置和速度
        """
        c1 = 2  # 学习因子,一般为2
        c2 = 2
        w = 0.4  # 自身权重因子
        for i in range(pop_size):
            # 更新速度
            self.pop_v[i] = w * self.pop_v[i] + c1 * random.uniform(0, 1) * (
                    self.p_best[i] - self.pop_x[i]) + c2 * random.uniform(0, 1) * (self.g_best - self.pop_x[i])
            # 更新位置
            self.pop_x[i] = self.pop_x[i] + self.pop_v[i]
            # 越界保护
            for j in range(self.var_num):
                if self.pop_x[i][j] < self.bound[0][j]:
                    self.pop_x[i][j] = self.bound[0][j]
                if self.pop_x[i][j] > self.bound[1][j]:
                    self.pop_x[i][j] = self.bound[1][j]
            # 更新p_best和g_best
            if self.fitness(self.pop_x[i]) > self.fitness(self.p_best[i]):
                self.p_best[i] = self.pop_x[i]
            if self.fitness(self.pop_x[i]) > self.fitness(self.g_best):
                self.g_best = self.pop_x[i]

    def main(self):
        popobj = []
        self.ng_best = np.zeros((1, self.var_num))[0]
        for gen in range(self.NGEN):
            self.update_operator(self.pop_size)
            popobj.append(self.fitness(self.g_best))
            print('############ Generation {} ############'.format(str(gen + 1)))
            if self.fitness(self.g_best) > self.fitness(self.ng_best):
                self.ng_best = self.g_best.copy()
            print('最好的位置:{}'.format(self.ng_best))
            print('最大的函数值:{}'.format(self.fitness(self.ng_best)))
        print("---- End of (successful) Searching ----")

        # plt.figure()
        # fig = plt.gcf()
        # fig.set_size_inches(18.5, 10.5)
        # plt.title("Figure1")
        # plt.xlabel("iterators", size=14)
        # plt.ylabel("fitness", size=14)
        # t = [t for t in range(self.NGEN)]
        # plt.plot(t, popobj, color='b', linewidth=2)
        # plt.show()

        plt.figure()
        plt.title("PSO-SVM")
        plt.xlabel("GENS", size=14)
        plt.ylabel("R2", size=14)
        t = [t for t in range(self.NGEN)]
        plt.plot(t, popobj, 'b', linewidth=2)
        plt.show()

if __name__ == '__main__':
    NGEN = 200
    popsize = 20
    low = [0,0.01,0]
    up = [200,0.01,50]
    parameters = [NGEN, popsize, low, up]
    pso = PSO(parameters)
    pso.main()

遗传算法GA

遗传算法(Genetic Algorithm)是一种基于生物进化观念的优化算法,它模拟自然界的进化过程,通过种群的遗传、变异和选择来搜索优化问题的解。

优点

  1. 可以搜索复杂的问题空间:遗传算法能够应用于各种类型的问题,包括连续、离散、整数和混合型问题。它可以搜索复杂的问题空间,找到全局最优解或接近最优解的解。
  2. 并行化能力强:遗传算法的并行计算能力强,可以利用多个处理器或计算机同时进行计算,加快搜索过程。
  3. 不依赖问题的具体形式:遗传算法不需要知道问题的具体形式,只需要知道解的表示形式和适应度函数,因此可以适用于各种问题。
  4. 可以处理多目标问题:遗传算法可以处理多目标优化问题,通过引入多个适应度函数和多种选择策略,使种群中存在多个非劣解。

缺点

  1. 运算速度较慢:由于遗传算法需要进行大量的计算和评估,所以在处理大规模优化问题时,其运算速度相对较慢。
  2. 参数选择困难:遗传算法需要选择合适的参数,包括交叉率、变异率等,这需要经验和实验来确定,而且参数的选择对算法的性能有很大影响。
  3. 需要合适的编码方式:遗传算法需要将问题的解进行编码,而且编码方式对算法的性能影响较大。选择不合适的编码方式可能导致算法无法收敛或收敛速度很慢。
  4. 可能陷入局部最优解:遗传算法是一种启发式搜索算法,它可能陷入局部最优解而无法找到全局最优解,特别是在参数选择不当的情况下。

封装

import csv
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import explained_variance_score
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import explained_variance_score
from sklearn import metrics
from sklearn.metrics import mean_absolute_error # 平方绝对误差
import random
import pandas as pd


# 设置适应度,这里设置为R2
def msefunc(predictval, realval):
    print("R2 = ", metrics.r2_score(realval, predictval))  # R2
    return metrics.r2_score(realval, predictval)


# 设置优化函数,这里为SVR,参数在此绑定,使用验证集输入验证得出适应度
def SVMResult(vardim, x, bound):
    data = pd.read_excel('C:/Users/孙海涛/Desktop/x.xlsx', sheet_name='Sheet1')  # 读取数据
    target = pd.read_excel('C:/Users/孙海涛/Desktop/y.xlsx', sheet_name='Sheet1')  # 读取数据
    x_train, x_test, y_train, y_test = train_test_split(data, target, random_state=22, test_size=0.25)

    std_x = StandardScaler()
    x_train = std_x.fit_transform(x_train)
    x_test = std_x.transform(x_test)

    std_y = StandardScaler()
    y_train = std_y.fit_transform(y_train)
    y_test = std_y.transform(y_test)
    y_test = std_y.inverse_transform(y_test)

    c = x[0]
    e = x[1]
    g = x[2]

    model_svr = SVR(C=c, epsilon=e, gamma=g)
    model_svr.fit(x_train, y_train)
    predict_results = std_y.inverse_transform(model_svr.predict(x_test))  # 预测结果
    return msefunc(predict_results, y_test)



class GAIndividual:
    '''
    individual of genetic algorithm
    '''

    def __init__(self, vardim, bound):
        '''
        vardim: dimension of variables
        bound: boundaries of variables
        '''
        self.vardim = vardim
        self.bound = bound
        self.fitness = 0.

    def generate(self):
        '''
        generate a random chromsome for genetic algorithm
        '''
        len = self.vardim
        rnd = np.random.random(size=len)
        self.chrom = np.zeros(len)
        for i in range(0, len):
            self.chrom[i] = self.bound[0, i] + \
                            (self.bound[1, i] - self.bound[0, i]) * rnd[i]

    def calculateFitness(self):
        '''
        calculate the fitness of the chromsome
        '''
        self.fitness = SVMResult(self.vardim, self.chrom, self.bound)


import random
import copy


class GeneticAlgorithm:
    '''
    The class for genetic algorithm
    '''

    def __init__(self, sizepop, vardim, bound, MAXGEN, params):
        '''
        sizepop: population sizepop人口规模
        vardim: dimension of variables变量维数
        bound: boundaries of variables变量边界
        MAXGEN: termination condition终止条件
        param: algorithm required parameters, it is a list which is consisting of crossover rate, mutation rate, alpha
        '''
        self.sizepop = sizepop
        self.MAXGEN = MAXGEN
        self.vardim = vardim
        self.bound = bound
        self.population = []
        self.fitness = np.zeros((self.sizepop, 1))
        self.trace = np.zeros((self.MAXGEN, 3))
        self.params = params

    def initialize(self):
        '''
        initialize the population
        '''
        for i in range(0, self.sizepop):
            ind = GAIndividual(self.vardim, self.bound)
            ind.generate()
            self.population.append(ind)

    def evaluate(self):
        '''
        evaluation of the population fitnesses
        '''
        for i in range(0, self.sizepop):
            self.population[i].calculateFitness()
            self.fitness[i] = self.population[i].fitness

    def solve(self):
        '''
        evolution process of genetic algorithm
        '''
        self.t = 0
        self.initialize()
        self.evaluate()
        best = np.max(self.fitness)
        bestIndex = np.argmax(self.fitness)
        self.best = copy.deepcopy(self.population[bestIndex])
        self.avefitness = np.mean(self.fitness)
        self.maxfitness = np.max(self.fitness)

        self.trace[self.t, 0] = self.best.fitness
        self.trace[self.t, 1] = self.avefitness
        self.trace[self.t, 2] = self.maxfitness
        print("Generation %d: optimal function value is: %f; average function value is %f;max function value is %f" % (
            self.t, self.trace[self.t, 0], self.trace[self.t, 1], self.trace[self.t, 2]))
        while (self.t < self.MAXGEN - 1):
            self.t += 1
            self.selectionOperation()
            self.crossoverOperation()
            self.mutationOperation()
            self.evaluate()
            best = np.max(self.fitness)
            bestIndex = np.argmax(self.fitness)
            if best > self.best.fitness:
                self.best = copy.deepcopy(self.population[bestIndex])
            self.avefitness = np.mean(self.fitness)
            self.maxfitness = np.max(self.fitness)

            self.trace[self.t, 0] = self.best.fitness
            self.trace[self.t, 1] = self.avefitness
            self.trace[self.t, 2] = self.maxfitness
            print(
                "Generation %d: optimal function value is: %f; average function value is %f;max function value is %f" % (
                    self.t, self.trace[self.t, 0], self.trace[self.t, 1], self.trace[self.t, 2]))

        print("Optimal function value is: %f; " %
              self.trace[self.t, 0])
        print("Optimal solution is:")
        print(self.best.chrom)
        self.printResult()

    def selectionOperation(self):
        '''
        selection operation for Genetic Algorithm
        '''
        newpop = []
        totalFitness = np.sum(self.fitness)
        accuFitness = np.zeros((self.sizepop, 1))

        sum1 = 0.
        for i in range(0, self.sizepop):
            accuFitness[i] = sum1 + self.fitness[i] / totalFitness
            sum1 = accuFitness[i]

        for i in range(0, self.sizepop):
            r = random.random()
            idx = 0
            for j in range(0, self.sizepop - 1):
                if j == 0 and r < accuFitness[j]:
                    idx = 0
                    break
                elif r >= accuFitness[j] and r < accuFitness[j + 1]:
                    idx = j + 1
                    break
            newpop.append(self.population[idx])
        self.population = newpop

    def crossoverOperation(self):
        '''
        crossover operation for genetic algorithm
        '''
        newpop = []
        for i in range(0, self.sizepop, 2):
            idx1 = random.randint(0, self.sizepop - 1)
            idx2 = random.randint(0, self.sizepop - 1)
            while idx2 == idx1:
                idx2 = random.randint(0, self.sizepop - 1)
            newpop.append(copy.deepcopy(self.population[idx1]))
            newpop.append(copy.deepcopy(self.population[idx2]))
            r = random.random()
            if r < self.params[0]:
                crossPos = random.randint(1, self.vardim - 1)
                for j in range(crossPos, self.vardim):
                    newpop[i].chrom[j] = newpop[i].chrom[
                                             j] * self.params[2] + (1 - self.params[2]) * newpop[i + 1].chrom[j]
                    newpop[i + 1].chrom[j] = newpop[i + 1].chrom[j] * self.params[2] + \
                                             (1 - self.params[2]) * newpop[i].chrom[j]
        self.population = newpop

    def mutationOperation(self):
        '''
        mutation operation for genetic algorithm
        '''
        newpop = []
        for i in range(0, self.sizepop):
            newpop.append(copy.deepcopy(self.population[i]))
            r = random.random()
            if r < self.params[1]:
                mutatePos = random.randint(0, self.vardim - 1)
                theta = random.random()
                if theta > 0.5:
                    newpop[i].chrom[mutatePos] = newpop[i].chrom[
                                                     mutatePos] - (
                                                             newpop[i].chrom[mutatePos] - self.bound[0, mutatePos]) * (
                                                             1 - random.random() ** (1 - self.t / self.MAXGEN))
                else:
                    newpop[i].chrom[mutatePos] = newpop[i].chrom[
                                                     mutatePos] + (
                                                             self.bound[1, mutatePos] - newpop[i].chrom[mutatePos]) * (
                                                             1 - random.random() ** (1 - self.t / self.MAXGEN))
        self.population = newpop

    def printResult(self):
        '''
        plot the result of the genetic algorithm
        '''
        x = np.arange(0, self.MAXGEN)
        y1 = self.trace[:, 0]
        y2 = self.trace[:, 1]
        y3 = self.trace[:, 2]
        # plt.plot(x, y1, 'r', label='optimal value')
        # plt.plot(x, y2, 'g', label='average value')
        # plt.plot(x, y3, 'b', label='max value')
        # plt.plot(x, y3, 'b',linewidth=2)
        # fig = plt.gcf()
        # fig.set_size_inches(18.5, 10.5)
        # plt.xlabel("GENS", size=14)
        # plt.ylabel("R2", size=14)
        # plt.title("GA-SVM")
        # # plt.legend()
        # plt.show()


        plt.figure()
        plt.title("GA-SVM")
        plt.xlabel("GENS", size=14)
        plt.ylabel("R2", size=14)
        plt.plot(x, y3, 'b',linewidth=2)
        plt.show()

# Generation 19: optimal function value is: 0.796173; average function value is 0.655682;max function value is 0.710410
# Optimal function value is: 0.796173;
# Optimal solution is:
# [ 6.73356933  0.29407187 48.58751446]


if __name__ == "__main__":
   bound = np.array([[0,0.01,0],[200,0.01,50]])
   ga = GeneticAlgorithm(20, 3, bound, 3, [0.7, 0.0175, 0.5])
   ga.solve()

布谷鸟算法CS

布谷鸟算法(Cuckoo Search Algorithm)是一种启发式算法,灵感来自布谷鸟筑巢的行为。该算法主要用于求解优化问题,包括连续优化问题和离散优化问题。

优点

  1. 具有全局搜索能力:布谷鸟算法通过随机生成的鸟巢来搜索问题的解空间,能够在整个搜索空间中找到全局最优解。

  2. 简单易实现:布谷鸟算法相对于其他优化算法而言,其实现相对简单。算法的核心思想是模拟布谷鸟的筑巢行为,只需要定义好适应度函数和更新策略即可。

  3. 搜索效率高:由于布谷鸟算法采用随机生成鸟巢的方式进行搜索,可以在较短的时间内找到问题的较优解,尤其适用于高维复杂问题的求解。

缺点

  1. 收敛速度慢:布谷鸟算法在搜索过程中容易陷入局部最优解,而且收敛速度相对较慢。这是因为算法只是通过随机生成鸟巢进行搜索,没有引入其他启发式信息进行指导。

  2. 参数选择较为困难:布谷鸟算法中存在一些重要的参数,例如种群大小、巢蛋数量等,这些参数的选择对算法的性能有重要影响。但是,如何选择合适的参数仍然是一个挑战。

  3. 对问题的建模要求高:布谷鸟算法需要将优化问题进行数学建模,并定义适应度函数。对于某些复杂的问题,建模和定义适应度函数可能会比较困难。

封装

import numpy as np
import scipy.special as sc_special
import matplotlib.pyplot as plt             #matplotlib的pyplot模块一般是最常用的,可以方便用户快速绘制二位图表
from matplotlib import cm 					 #matplotlib是python最著名的绘图库
from mpl_toolkits.mplot3d import Axes3D    #3D绘图
"""
    Cuckoo search function
    ---------------------------------------------------
    Input parameters:
        n: 巢的数量
        m: 维数
        fit_func: 适用度函数
        lower_boundary: 下边界
        upper_boundary: 上边界
        iter_num: 迭代次数 (默认: 100)
        pa: 被宿主发现蛋的概率 (default: 0.25)
        beta:与问题规模相关的步长比例因子 (note: 1 < beta < 2) (default: 1.5)
        step_size:  与问题规模相关的步长比例因子 (default: 0.1)
    Output:
        最佳解决方案及值
"""

#绘制图像
def plot_3d(ax):
    x = np.arange(-3, 3, 0.1)       #在指定的间隔内返回均匀间隔的数字
    y = np.arange(-3, 3, 0.1)
    x,y = np.meshgrid(x, y)
    z = 3*(1-x)**2*np.e**(-x**2-(y+1)**2) - 10*(x/5-x**3-y**5)*np.e**(-x**2-y**2) - (np.e**(-(x+1)**2-y**2))/3
    ax.plot_surface(x,y,z,rstride=1,cstride=1,cmap=cm.coolwarm)# rstride:行之间的跨度  cstride:列之间的跨度,cmap是颜色映射表
    ax.set_zlim(-10,10)               ##坐标系的下边界和上边界
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    plt.pause(3)                      #延时
    plt.show()                        #显示

def fit_func(nest):
        x, y = nest
        return 3*(1-x)**2*np.e**(-x**2-(y+1)**2) - 10*(x/5-x**3-y**5)*np.e**(-x**2-y**2) - (np.e**(-(x+1)**2-y**2))/3

#计算适应度
def calc_fitness(fit_func, nests):
    n, m = nests.shape
    fitness = np.empty(n)

    for each_nest in range(n):
        fitness[each_nest] = fit_func(nests[each_nest])

    return fitness

#主要过程
def cuckoo_search(n, m, fit_func, lower_boundary, upper_boundary, iter_num = 100,pa = 0.25, beta = 1.5, step_size = 0.1):
    # 得到最初蛋的位置
    nests = generate_nests(n, m, lower_boundary, upper_boundary)
    fitness = calc_fitness(fit_func, nests)
    # 得到最好的巢的位置并更新
    best_nest_index = np.argmax(fitness)
    best_fitness = fitness[best_nest_index]
    best_nest = nests[best_nest_index].copy()

    for _ in range(iter_num):
        nests = update_nests(fit_func, lower_boundary, upper_boundary, nests, best_nest, fitness, step_size)
        nests = abandon_nests(nests, lower_boundary, upper_boundary, pa)
        fitness = calc_fitness(fit_func, nests)

        x=np.empty((n, 1))
        y=np.empty((n, 1))
        z=np.empty((n, 1))
        for i in range(20):
           x[i],y[i]=nests[i]
        z=3*(1-x)**2*np.e**(-x**2-(y+1)**2) - 10*(x/5-x**3-y**5)*np.e**(-x**2-y**2) - (np.e**(-(x+1)**2-y**2))/3
        if 'sca' in locals():
            sca.remove()
        sca = ax.scatter(x, y, z, c='black', marker='o');
        plt.show();
        plt.pause(0.1)        #ax.scatter特征值散点图

        max_nest_index = np.argmax(fitness)
        max_fitness = fitness[max_nest_index]
        max_nest = nests[max_nest_index]

        if (max_fitness > best_fitness):
            best_nest = max_nest.copy()
            best_fitness = max_fitness

    return (best_nest, best_fitness)


#生成巢穴位置
def generate_nests(n, m, lower_boundary, upper_boundary):
    lower_boundary = np.array(lower_boundary)  #转成矩阵形式
    upper_boundary = np.array(upper_boundary)
    nests = np.empty((n, m))                   #生成n行m列的数组

    for each_nest in range(n):
        nests[each_nest] = lower_boundary + np.array([np.random.rand() for _ in range(m)]) * (upper_boundary - lower_boundary) #生成在[-3,3]范围内的下x,y的n个样本,即20*2的矩阵

    return nests


#获取新的巢穴位置并用好的替换掉旧的不好的
def update_nests(fit_func, lower_boundary, upper_boundary, nests, best_nest, fitness, step_coefficient):
    lower_boundary = np.array(lower_boundary)
    upper_boundary = np.array(upper_boundary)
    n, m = nests.shape
    # 使用莱维飞行产生步长
    steps = levy_flight(n, m, 1.5)
    new_nests = nests.copy()

    for each_nest in range(n):
        # coefficient 0.01 is to avoid levy flights becoming too aggresive
        # and (nest[each_nest] - best_nest) could let the best nest be remained
        step_size = step_coefficient * steps[each_nest] * (nests[each_nest] - best_nest)
        step_direction = np.random.rand(m)
        new_nests[each_nest] += step_size * step_direction
        # apply boundary condtions
        new_nests[each_nest][new_nests[each_nest] < lower_boundary] = lower_boundary[new_nests[each_nest] < lower_boundary]
        new_nests[each_nest][new_nests[each_nest] > upper_boundary] = upper_boundary[new_nests[each_nest] > upper_boundary]

    new_fitness = calc_fitness(fit_func, new_nests)
    nests[new_fitness > fitness] = new_nests[new_fitness > fitness]

    return nests


#卵被丢弃寻找新巢
def abandon_nests(nests, lower_boundary, upper_boundary, pa):
    lower_boundary = np.array(lower_boundary)
    upper_boundary = np.array(upper_boundary)
    n, m = nests.shape
    for each_nest in range(n):
        if (np.random.rand() < pa):
            step_size = np.random.rand() * (nests[np.random.randint(0, n)] - nests[np.random.randint(0, n)])
            nests[each_nest] += step_size
            # apply boundary condtions
            nests[each_nest][nests[each_nest] < lower_boundary] = lower_boundary[nests[each_nest] < lower_boundary]
            nests[each_nest][nests[each_nest] > upper_boundary] = upper_boundary[nests[each_nest] > upper_boundary]

    return nests


#计算莱维飞行
def levy_flight(n, m, beta):
    sigma_u = (sc_special.gamma(1+beta)*np.sin(np.pi*beta/2)/(sc_special.gamma((1+beta)/2)*beta*(2**((beta-1)/2))))**(1/beta)
    sigma_v = 1

    u =  np.random.normal(0, sigma_u, (n, m))
    v = np.random.normal(0, sigma_v, (n, m))

    steps = u/((np.abs(v))**(1/beta))  #steps为20*2的矩阵

    return steps

#主函数
if __name__=='__main__':
    fig = plt.figure()    #绘制背景图
    ax = Axes3D(fig)
    plt.ion()#将画图模式改为交互模式,程序遇到plt.show不会暂停,而是继续执行
    plot_3d(ax)

    best_nest, best_fitness = cuckoo_search(25, 2, fit_func, [-3, -3], [3, 3], step_size = 0.4)

    print('最大值为:%.5f, 在(%.5f, %.5f)处取到!'%(best_fitness, best_nest[0], best_nest[1]))
    plt.ioff()    #将画图交互模式关闭
    plot_3d(ax)

支持向量机回归算法SVR

支持向量机回归算法(Support Vector Regression)是一种基于支持向量机的回归方法。与传统的回归算法不同,SVR 不仅仅寻求一个最优线性或非线性回归方程,而是寻求一个最优的边界(超平面),使得所有数据点到该超平面的距离都小于一个预先定义的容忍度。

优点

  1. 非线性关系处理能力强:可以通过使用不同的核函数,将输入空间映射到高维特征空间,从而解决非线性回归问题。
  2. 对异常值不敏感:由于回归目标是最大化间隔,因此对于异常值有一定的容忍度,不会完全受其影响。
  3. 支持多维特征:可以处理多维特征的回归问题,并且不会出现过拟合问题。

缺点

  1. 对参数的依赖性:需要选择合适的核函数和相关参数,如果选择不当,可能会导致模型的性能下降。
  2. 计算复杂度较高:回归算法的计算复杂度与训练样本数目相关,在大样本数据集上训练时间较长。
  3. 对缺失数据敏感:算法对缺失数据敏感,如果数据中存在缺失值,需要进行额外的数据处理。
  4. 可解释性较差:回归算法是一种黑盒模型,不容易解释模型的预测结果。

封装

import csv
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import explained_variance_score
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import explained_variance_score
from sklearn import metrics
from sklearn.metrics import mean_absolute_error # 平方绝对误差
import random
import pandas as pd

def linear2():
    data = pd.read_excel('C:/Users/孙海涛/Desktop/x.xlsx', sheet_name='Sheet1')  # 读取数据
    target = pd.read_excel('C:/Users/孙海涛/Desktop/y.xlsx', sheet_name='Sheet1')  # 读取数据
    x_train, x_test, y_train, y_test = train_test_split(data, target, random_state=22, test_size=0.25)

    std_x = StandardScaler()
    x_train = std_x.fit_transform(x_train)
    x_test = std_x.transform(x_test)

    std_y = StandardScaler()
    y_train = std_y.fit_transform(y_train)
    y_test = std_y.transform(y_test)
    y_test = std_y.inverse_transform(y_test)

    # model_svr = SVR(C=1, epsilon=0.1, gamma=10)
    # model_svr = SVR(C=148.990417, epsilon=0.01, gamma=0.616337768)#ga
    # model_svr = SVR(C=16.81477247, epsilon=0.01, gamma=0.46380986)  # cs
    model_svr = SVR(C=17.0006218, epsilon=0.01, gamma=0.462968607)  # pos
    model_svr.fit(x_train, y_train)
    predict_results = std_y.inverse_transform(model_svr.predict(x_test))#预测结果

    error1 = metrics.r2_score(y_test, predict_results)
    MAPE = metrics.mean_absolute_percentage_error(y_test, predict_results)
    RMSE = metrics.mean_squared_error(y_test, predict_results) ** 0.5
    print("R Squared误差为:\n", error1)
    print("MAPE误差为:\n", MAPE)
    print("RMSE误差为:\n", RMSE)

    #绘图
    plt.scatter(y_test, predict_results)
    plt.plot([0,500],[0,500],'--',color='r')
    plt.show()





if __name__ == '__main__':
    linear2()

岭回归

岭回归是一种用于解决多重共线性问题的机器学习算法

优点

  1. 可以很好地处理多重共线性问题。当特征之间存在高度相关性时,岭回归可以降低特征系数的方差,提高模型的稳定性和泛化能力。
  2. 可以使用L2正则化来防止过拟合。通过对特征系数进行惩罚,岭回归可以避免模型对训练数据过度拟合,从而提高模型的泛化能力。
  3. 对于小样本数据集,岭回归可以提供较好的预测性能。由于岭回归可以通过缩小特征系数来降低模型的复杂度,使得在小样本情况下也能获得较好的模型性能。

缺点

  1. 岭回归无法选择最优的惩罚力度。在实际应用中,需要手动调节岭回归中的超参数,如正则化系数,来控制特征系数的缩小程度。这需要一定的经验和试错来选择最优的参数。
  2. 对于大型数据集,岭回归的计算复杂度较高。由于岭回归需要通过求解正规方程来估计特征系数,计算量较大。对于大规模数据集,可能需要使用更高效的算法来加快计算速度。
  3. 岭回归在特征选择方面的表现较弱。由于岭回归会对所有的特征进行惩罚,即使某些特征与目标变量没有关联,它们仍然会有较大的系数。因此,在特征选择的场景下,岭回归的表现可能不如其他相关算法。

封装

import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,SGDRegressor,Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd
from sklearn.metrics import r2_score#R square
import time
from sklearn import metrics
import numpy as np
def linear3():
    #岭回归
    start = time.time()
    # 梯度下降
    # 获取数据
    data = pd.read_excel('C:/Users/孙海涛/Desktop/x.xlsx', sheet_name='Sheet1')  # 读取数据
    target = pd.read_excel('C:/Users/孙海涛/Desktop/y.xlsx', sheet_name='Sheet1')  # 读取数据
    # 划分数据集
    x_train, x_test, y_train, y_test = train_test_split(data, target, random_state=22, test_size=0.25)
    # print(x_train)
    # 进行可视化
    # plt.scatter(data[0],target)
    # plt.show()
    # 需要做标准化处理对于特征值处理
    std_x = StandardScaler()
    x_train = std_x.fit_transform(x_train)
    x_test = std_x.transform(x_test)
    # 对于目标值进行标准化
    std_y = StandardScaler()
    y_train = std_y.fit_transform(y_train)

    y_test = std_y.transform(y_test)
    y_test = std_y.inverse_transform(y_test)
    # 预估器
    estimator = Ridge(alpha=0.99244312)
    estimator.fit(x_train, y_train)
    # 得出模型
    # print("岭回归权重系数为:\n", estimator.coef_)
    # print("岭回归偏值为:\n", estimator.intercept_)
    # 进行可视化
    # plt.scatter(x_train,y_train)
    # plt.plot(x_train,y_train,color='r')
    # plt.show()
    # 模型评估
    y_predict = std_y.inverse_transform(estimator.predict(x_test))
    end = time.time()
    # error = mean_squared_error(y_test,y_predict)
    # print("正规方程-均方误差为:\n",error)
    # 用R^2来判断模型的拟合程度
    error1 = r2_score(y_test, y_predict)
    print("R Squared误差为:\n", error1)
    # mape和rmse    MAPE来判断预测值的误差
    MAPE = metrics.mean_absolute_percentage_error(y_test, y_predict)
    RMSE = metrics.mean_squared_error(y_test, y_predict) ** 0.5
    print("MAPE误差为:\n", MAPE)
    print("RMSE误差为:\n", RMSE)
    # plt.scatter(y_test, y_predict)
    # plt.plot([0, 500], [0, 500], '--', color='r')
    # plt.show()
    # print("程序process_1的运行时间为:{}".format(end - start))
    return None

if __name__ == '__main__':
    linear3()

正规方程

正规方程是解决线性最小二乘问题的一种方法。它通过求解导数为零的方程来找到使得误差最小的参数估计。

优点

  1. 理论基础:正规方程是通过求解导数为零的方程来得到最小二乘估计的闭式解,具有较强的理论支持。
  2. 精确解:正规方程可以得到最小二乘估计的精确解,无需迭代过程,计算速度相对较快。
  3. 可解释性:正规方程给出了解析解,可以直接得到对模型参数的解释和理解。

缺点

  1. 计算复杂度高:求解正规方程需要计算矩阵的逆,当矩阵规模较大时,计算复杂度较高,耗费时间和计算资源。
  2. 存在矩阵逆的问题:当矩阵不可逆或接近奇异时,求解正规方程可能会失败。
  3. 效果受数据噪声影响:正规方程对数据中的噪声敏感,当数据中存在噪声时,最小二乘估计可能会受到影响。

封装

import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,SGDRegressor,Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd
from sklearn.metrics import r2_score#R square
import time
from sklearn import metrics
import numpy as np
def linear1():
    start = time.time()
    #正规方程
    #获取数据
    data = pd.read_excel('C:/Users/孙海涛/Desktop/x.xlsx', sheet_name='Sheet1')  # 读取数据
    target = pd.read_excel('C:/Users/孙海涛/Desktop/y.xlsx', sheet_name='Sheet1')  # 读取数据
    #划分数据集
    x_train,x_test,y_train,y_test = train_test_split(data,target,random_state=22,test_size=0.25)
    # print(x_train)
    # 进行可视化
    # plt.scatter(data[0],target)
    # plt.show()
    #需要做标准化处理对于特征值处理
    std_x =StandardScaler()
    x_train = std_x.fit_transform(x_train)
    x_test = std_x.transform(x_test)
    # 对于目标值进行标准化
    std_y = StandardScaler()
    y_train = std_y.fit_transform(y_train)

    y_test = std_y.transform(y_test)

    y_test = std_y.inverse_transform(y_test)

    #预估器
    estimator = LinearRegression()
    estimator.fit(x_train,y_train)
    #得出模型
    # print("正规方程权重系数为:\n",estimator.coef_)
    # print("正规方程偏值为:\n",estimator.intercept_)
    #进行可视化
    # plt.scatter(x_train,y_train)
    # plt.plot(x_train,y_train,color='r')
    # plt.show()
    #模型评估
    y_predict = std_y.inverse_transform(estimator.predict(x_test))
    end = time.time()
    # error = mean_squared_error(y_test,y_predict)
    # print("正规方程-均方误差为:\n",error)
    #用R^2来判断模型的拟合程度
    error1 = r2_score(y_test, y_predict)
    print("R Squared误差为:\n", error1)
    #mape和rmse    MAPE来判断预测值的误差
    MAPE = metrics.mean_absolute_percentage_error(y_test, y_predict)
    RMSE = metrics.mean_squared_error(y_test, y_predict) ** 0.5
    # plt.scatter(y_test, y_predict)
    # plt.plot([0,500],[0,500],'--',color='r')
    # plt.show()
    print("MAPE误差为:\n", MAPE)
    print("RMSE误差为:\n", RMSE)
    # print("程序process_1的运行时间为:{}".format(end - start))
    return None

if __name__ == '__main__':
    linear1()

随机梯度下降

随机梯度下降(Stochastic Gradient Descent, SGD)是一种常用的机器学习优化算法,用于训练模型的参数。

优点

  1. 速度快:相比于传统的梯度下降算法,SGD每次只使用一个样本进行参数更新,计算速度更快。
  2. 内存消耗小:由于每次只使用一个样本进行更新,所以内存消耗较小,适用于大规模数据集。
  3. 适用于在线学习:SGD适用于在线学习问题,可以边接收新样本边更新模型参数。
  4. 可用于非凸优化:SGD对于非凸优化问题也能有较好的效果。

缺点

  1. 不稳定性:由于SGD每次只使用一个样本进行参数更新,导致参数的更新过程非常不稳定,可能会出现震荡现象。
  2. 可能会收敛到局部最优解:由于参数的更新是根据随机选择的样本进行的,可能会导致收敛到局部最优解而不是全局最优解。
  3. 需要合适的学习率:SGD对学习率非常敏感,需要合适的学习率才能保证算法快速收敛。
  4. 需要进行参数调整:在应用SGD算法时,需要进行一些参数的调整,如学习率、正则化参数等。

封装

import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,SGDRegressor,Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd
from sklearn.metrics import r2_score#R square
import time
from sklearn import metrics
import numpy as np

def linear2():
    # 梯度下降
    start = time.time()
    # 梯度下降
    # 获取数据
    data = pd.read_excel('C:/Users/孙海涛/Desktop/x.xlsx', sheet_name='Sheet1')  # 读取数据
    target = pd.read_excel('C:/Users/孙海涛/Desktop/y.xlsx', sheet_name='Sheet1')  # 读取数据
    # 划分数据集
    x_train, x_test, y_train, y_test = train_test_split(data, target, random_state=22, test_size=0.25)
    # print(x_train)
    # 进行可视化
    # plt.scatter(data[0],target)
    # plt.show()
    # 需要做标准化处理对于特征值处理
    std_x = StandardScaler()
    x_train = std_x.fit_transform(x_train)
    x_test = std_x.transform(x_test)
    # 对于目标值进行标准化
    std_y = StandardScaler()
    y_train = std_y.fit_transform(y_train)

    y_test = std_y.transform(y_test)
    y_test = std_y.inverse_transform(y_test)
    # 预估器
    estimator = SGDRegressor(alpha=0.055935631,max_iter=1000)
    estimator.fit(x_train, y_train)
    # 得出模型
    # print("梯度下降权重系数为:\n", estimator.coef_)
    # print("梯度下降偏值为:\n", estimator.intercept_)
    # 进行可视化
    # plt.scatter(x_train,y_train)
    # plt.plot(x_train,y_train,color='r')
    # plt.show()
    # 模型评估
    y_predict = std_y.inverse_transform(estimator.predict(x_test))
    end = time.time()
    # error = mean_squared_error(y_test,y_predict)
    # print("正规方程-均方误差为:\n",error)
    # 用R^2来判断模型的拟合程度
    error1 = r2_score(y_test, y_predict)
    print("R Squared误差为:\n", error1)
    # mape和rmse    MAPE来判断预测值的误差
    MAPE = metrics.mean_absolute_percentage_error(y_test, y_predict)
    RMSE = metrics.mean_squared_error(y_test, y_predict) ** 0.5
    print("MAPE误差为:\n", MAPE)
    print("RMSE误差为:\n", RMSE)
    # plt.scatter(y_test, y_predict)
    # plt.plot([0, 500], [0, 500], '--', color='r')
    # plt.show()
    # print("程序process_1的运行时间为:{}".format(end - start))
    return None

if __name__ == '__main__':
    linear2()
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

傻傻虎虎

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值