首先定义遗传算法(GA)。
染色体编码部分,实际上就是种群个体初始化。
def encode(pop, dim, ub, lb):
'''染色体编码'''
X = np.zeros([pop, dim])
for i in range(pop):
for j in range(dim):
X[i, j] = np.round((ub[j] - lb[j]) * np.random.random() + lb[j])
return X
染色体解码部分,将十进制的个体转换成二进制。
def decode(X, ub, lb):
'''染色体解码'''
pop, dim = X.shape
Y = np.zeros([pop, dim])
for i in range(pop):
for j in range(dim):
Y[i, j] = (X[i, j] / (2 ** 16 - 1)) * (ub[j] - lb[j]) + lb[j]
return Y
定义适应度函数。
def fitness_evaluation(X, fun):
'''评估种群适应度'''
fitness = np.zeros(X.shape[0])
for i in range(X.shape[0]):
fitness[i] = fun(X[i, :])
return fitness
定义染色体的选择操作,采用轮盘赌策略选择个体。
def selection(X, fitness):
'''选择操作'''
fitness[~np.isfinite(fitness)] = 1e-8 # 将非有限值设置为 1e-8
fitness = fitness - np.min(fitness) # 将适应度函数调整为非负数
ratio = fitness / np.sum(fitness)
ratio[np.isnan(ratio)] = 1e-8 # 将 NaN 值替换为 1e-8
ratio_sum = np.nansum(ratio)
if np.isnan(ratio_sum) or ratio_sum == 0:
ratio = np.ones_like(ratio) / len(ratio)
else:
ratio = ratio / ratio_sum
selected_idx = np.random.choice(X.shape[0], X.shape[0], replace=True, p=ratio)
selected_X = X[selected_idx, :]
selected_fitness = fitness[selected_idx]
return selected_X, selected_fitness
染色体的交叉操作。
def crossover(X, pc):
'''交叉操作'''
pop, dim = X.shape
offspring_X = copy.deepcopy(X)
for i in range(0, pop-1, 2):
if np.random.random() < pc:
crossover_point = np.random.randint(1, dim)
offspring_X[i, crossover_point:] = X[i+1, crossover_point:]
offspring_X[i+1, crossover_point:] = X[i, crossover_point:]
return offspring_X
染色体的变异操作。
def mutation(X, pm, ub, lb):
'''变异操作'''
pop, dim = X.shape
offspring_X = copy.deepcopy(X)
for i in range(pop):
for j in range(dim):
if np.random.random() < pm:
offspring_X[i, j] = np.round((ub[j] - lb[j]) * np.random.random() + lb[j])
return offspring_X
定义遗传算法的主体函数,返回array数组。
def GA(pop, dim, T, ub, lb, fun,info=False):
'''遗传算法求解最优解'''
X = generate_population(pop, dim, ub, lb) # 生成初始种群
fitness = fitness_evaluation(X, fun) # 评估初始种群适应度
GbestIndex = np.argmin(fitness) # 记录全局最优个体索引
Gbest = copy.deepcopy(X[GbestIndex, :])
GbestScore = fitness[GbestIndex]
curve = np.zeros(T) # 记录适应度曲线
Gbestcurve = []# 记录最优位置变化曲线
for t in range(T):
selected_X, selected_fitness = selection(X, fitness) # 选择操作
offspring_X = crossover(selected_X, 0.8) # 交叉操作
offspring_X = mutation(offspring_X, 0.1, ub, lb) # 变异操作
offspring_fitness = fitness_evaluation(offspring_X, fun) # 评估子代种群适应度
X = np.vstack((X, offspring_X)) # 父代种群和子代种群合并
fitness = np.hstack((fitness, offspring_fitness))
sort_idx = np.argsort(fitness) # 按适应度排序
X = X[sort_idx[:pop], :]
fitness = fitness[sort_idx[:pop]]
if np.min(fitness) < GbestScore: # 更新全局最优个体
GbestIndex = np.argmin(fitness)
Gbest = copy.deepcopy(X[GbestIndex, :])
GbestScore = fitness[GbestIndex]
curve[t] = GbestScore # 记录适应度曲线
Gbestcurve.append(Gbest)
if info:
print(f'第{t}次迭代,当前最优适应度和最优个体位置为:',GbestScore,Gbest)
return np.array(GbestScore),np.array(Gbest) , np.array(curve), np.array(Gbestcurve)
遗传算法定义完成之后,写BP网络部分,采用tensorflow2.0框架搭建网络(简单结构的网络采用tensorflow的Sequential容器搭建非常快捷,比pytorch方便很多)。
首先导入数据并做简单处理,本次网络模型的训练数据采用的是某地的降水量数据。
#读取数据
df = pd.read_excel('./train.xlsx')
df2 = pd.read_excel('./test.xlsx')
#选择训练集特征值和目标值
Y= df['降水量(mm)'].values
X= df.drop(columns='降水量(mm)').values
#选择测试集特征值和目标值
Y1= df2['降水量(mm)'].values
X1= df2.drop(columns='降水量(mm)').values
#实例化归一化器
Y_MM = StandardScaler()
X_MM = StandardScaler()
#训练集归一化
x_train = X_MM.fit_transform(X)
y_train = Y_MM.fit_transform(Y.reshape(-1,1))
#测试集归一化
x_test = X_MM.fit_transform(X1)
y_test = Y_MM.fit_transform(Y1.reshape(-1,1))
数据导入完成之后定义BP网络,采用Sequential方式搭建,可以看到就几行代码,非常简单方便。
def bp(units1,units2,learning_rate,dropout,epoch,batchsize):
model = Sequential()
model.add(Dense(units1, activation='relu', input_dim=x_train.shape[1]))
model.add(Dense(units2, activation='relu'))
model.add(Dropout(dropout))
model.add(Dense(y_train.shape[1]))
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse', metrics='mae')
history = model.fit(x_train, y_train, epochs=epoch, batch_size=batchsize,validation_data=(x_test,y_test),verbose=0)
# 输出预测值并对预测值进行反归一化
y_test_pre = model.predict(x_test)
y_test_pre_inver = Y_MM.inverse_transform(y_test_pre)
y_test_true_inver = Y_MM.inverse_transform(y_test)
return model,history,y_test_pre_inver,y_test_true_inver
定义适应度函数,采用R2做为适应度函数,由于R2越大模型越优秀,而我们上面定义的遗传算法是求解函数最小值的,因此对R2取反。
def funfit(x):
tf.random.set_seed(0)#随机数种子
units1 = int(x[0]) # 第一层神元个数
units2 = int(x[1]) # 第二层神元个数
learning_rate = x[2] # 学习率
dropout = x[3] #dropout
epoch = int(x[4]) #epoch
batchsize = int(x[5]) # batch_size
model, history, y_test_pre_inver, y_test_true_inver = bp(units1,units2,learning_rate,dropout,epoch,batchsize)
fit = r2_score(y_test_true_inver,y_test_pre_inver)
return -fit
调用遗传算法开始优化。
GbestScore, GbestPositon, Curve,GbestCurve = GA.GA(pop, dim, maxT, ub, lb, fun, info=True)
优化过程的适应度曲线变化情况,可以看到效果还是不错的。
优化过程中的超参数位置变化曲线。
最后是整体的预测效果展示。
完整代码获取加QQ1019312261