遗传算法+朴素贝叶斯Python实现

一、遗传算法流程图:

在这里插入图片描述

交叉策略的选取

在这里插入图片描述

迭代终止条件:

  1. 迭代次数达到预设值 √
  2. 适应度函数达到预设要求
  3. 群组趋于稳定(可以理解为连续m代的最优解相同,适应度不再变化)

二、源代码:

import pandas as pd 
import matplotlib.pyplot as plt
# from pyecharts import Line 
import numpy as np 
import copy 
from sklearn.model_selection import train_test_split 
from sklearn.naive_bayes import GaussianNB 
from sklearn.metrics import accuracy_score

df = pd.read_csv('..\Save\data.csv') 
df = df[['Amenity', 'Bump', 'Crossing', 'Junction', 
 		 'Railway', 'Roundabout', 'Station', 'Traffic_Calming', 
 		 'Give_Way', 'No_Exit', 'Stop', 'Traffic_Signal', 'Severity']]


class GA:
    def __init__(self, df):
        df_GA = copy.deepcopy(df)
        df_GA = df_GA.replace(True, 1)
        df_GA = df_GA.replace(False, 0)

        X_GA = df_GA.drop(columns='Severity')
        Y_GA = df_GA['Severity']
        self.X_train_GA, self.X_test_GA, self.y_train_GA, self.y_test_GA = train_test_split(X_GA,
                                                                                            Y_GA,
                                                                                            test_size=0.3,
                                                                                            random_state=1)
        groups = pd.DataFrame(np.random.randint(2, size=(20, 12)))
        self.groups = groups
        # print(self.groups)
        
    def caculate_fitness(self, groups):
        fitness = []
        for i in range(20):
            col_index = [idx for idx, x in enumerate(groups.iloc[i, :]) if x == 1]    # 选取特征
            # print(col_index)
            X_GA_train = pd.DataFrame(self.X_train_GA).iloc[:, col_index]
            X_GA_test = pd.DataFrame(self.X_test_GA).iloc[:, col_index]
            
            model = GaussianNB()
            model.fit(X_GA_train, self.y_train_GA)
            y_pred_GA = model.predict(X_GA_test)
            fitness.append(accuracy_score(y_pred_GA, self.y_test_GA))
        return fitness

    def optimize_choose(self, groups):
        fitness = self.caculate_fitness(groups)
        # print(fitness)
        optimize_idx = fitness.index(max(fitness))  # 最优解的索引
        optimize = pd.DataFrame(groups).iloc[optimize_idx, :]   # 最优解
        optimize_score = max(fitness)   # 最优解的评分
        
        return optimize, optimize_score

    def cross_variation(self, groups, variation_rate):
        parents = pd.DataFrame()
        # 获取父母, 选取前10个作为下一代的父母
        fitness = self.caculate_fitness(groups)
        fitness_sorted = sorted(enumerate(fitness), key=lambda x: x[1], reverse=True)
        # print(fitness_sorted)
        opt_choose_idx = [idx for idx, x in fitness_sorted[0:10]]
        # print(opt_choose_idx)
        groups_update = copy.deepcopy(groups)
        parents = groups.iloc[opt_choose_idx,:]

        # 生成子代
        childs = []
        # 1. 交叉
        j = 0
        while j < 10:
            idx = np.random.randint(0, 12)
            cross_left = list(range(idx))
            cross_right = list(range(idx, 12))
            # 切割
            tmp_f_left = list(groups_update.iloc[j, cross_left])
            tmp_f_right = list(groups_update.iloc[j, cross_right])
            tmp_m_left = list(groups_update.iloc[j + 1, cross_left])
            tmp_m_right = list(groups_update.iloc[j + 1, cross_right])

            tmp_f_left.extend(tmp_m_right)
            tmp_m_left.extend(tmp_f_right)

            childs.append(tmp_f_left)
            childs.append(tmp_m_left)
            j += 2
        childs = pd.DataFrame(childs)
        
        # 2. 变异
        for k in range(10):
            # print(childs[i])
            variate_idx = np.random.randint(0, 12)  # 随机生成变异点
            tmp = np.random.random()
            # print(tmp)
            # print(childs[i])
            if tmp <= variation_rate:
                if childs.iloc[k, variate_idx] == 1:
                   childs.iloc[k, variate_idx] = 0
                else:
                   childs.iloc[k, variate_idx] = 1
                   
        groups_update = pd.concat([parents, childs],  ignore_index = True)
        return groups_update

    def run(self):
        init_groups = copy.deepcopy(self.groups)
        times = 0
        score = []
        while times < 100:
            optimize, optimize_score = self.optimize_choose(init_groups)
            groups_update = self.cross_variation(init_groups, 0.1)
            init_groups = copy.deepcopy(groups_update)
            score.append(optimize_score)
            times += 1
            print('[Generation {}]当前最优解得分{}'.format(times, optimize_score))
        
        print('最优解序列为', optimize)


model_GA = GA(df) 
model_GA.run() 

踩坑记录1: 关于赋值与传参

groups(Dataframe)格式在传参时,如果单纯使用groups_update = groups进行赋值,在后续的迭代中发现groupslist格式,这导致了后续迭代出错终止。(猜测因为两个变量为一个内存地址,出现了干扰),建议使用:

import copy
groups_update = copy.deepcopy(groups)
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值