遗传算法+朴素贝叶斯Python实现

<ZYH>

已于 2023-03-13 11:22:28 修改

阅读量196

点赞数 1

分类专栏：机器学习文章标签： python 机器学习

于 2023-03-07 14:29:37 首次发布

本文链接：https://blog.csdn.net/weixin_55059461/article/details/129265679

版权

机器学习专栏收录该内容

1 篇文章 0 订阅

订阅专栏

一、遗传算法流程图：

交叉策略的选取

迭代终止条件：

迭代次数达到预设值 √
适应度函数达到预设要求
群组趋于稳定（可以理解为连续m代的最优解相同，适应度不再变化）

二、源代码：

import pandas as pd 
import matplotlib.pyplot as plt
# from pyecharts import Line 
import numpy as np 
import copy 
from sklearn.model_selection import train_test_split 
from sklearn.naive_bayes import GaussianNB 
from sklearn.metrics import accuracy_score

df = pd.read_csv('..\Save\data.csv') 
df = df[['Amenity', 'Bump', 'Crossing', 'Junction', 
 		 'Railway', 'Roundabout', 'Station', 'Traffic_Calming', 
 		 'Give_Way', 'No_Exit', 'Stop', 'Traffic_Signal', 'Severity']]


class GA:
    def __init__(self, df):
        df_GA = copy.deepcopy(df)
        df_GA = df_GA.replace(True, 1)
        df_GA = df_GA.replace(False, 0)

        X_GA = df_GA.drop(columns='Severity')
        Y_GA = df_GA['Severity']
        self.X_train_GA, self.X_test_GA, self.y_train_GA, self.y_test_GA = train_test_split(X_GA,
                                                                                            Y_GA,
                                                                                            test_size=0.3,
                                                                                            random_state=1)
        groups = pd.DataFrame(np.random.randint(2, size=(20, 12)))
        self.groups = groups
        # print(self.groups)
        
    def caculate_fitness(self, groups):
        fitness = []
        for i in range(20):
            col_index = [idx for idx, x in enumerate(groups.iloc[i, :]) if x == 1]    # 选取特征
            # print(col_index)
            X_GA_train = pd.DataFrame(self.X_train_GA).iloc[:, col_index]
            X_GA_test = pd.DataFrame(self.X_test_GA).iloc[:, col_index]
            
            model = GaussianNB()
            model.fit(X_GA_train, self.y_train_GA)
            y_pred_GA = model.predict(X_GA_test)
            fitness.append(accuracy_score(y_pred_GA, self.y_test_GA))
        return fitness

    def optimize_choose(self, groups):
        fitness = self.caculate_fitness(groups)
        # print(fitness)
        optimize_idx = fitness.index(max(fitness))  # 最优解的索引
        optimize = pd.DataFrame(groups).iloc[optimize_idx, :]   # 最优解
        optimize_score = max(fitness)   # 最优解的评分
        
        return optimize, optimize_score

    def cross_variation(self, groups, variation_rate):
        parents = pd.DataFrame()
        # 获取父母, 选取前10个作为下一代的父母
        fitness = self.caculate_fitness(groups)
        fitness_sorted = sorted(enumerate(fitness), key=lambda x: x[1], reverse=True)
        # print(fitness_sorted)
        opt_choose_idx = [idx for idx, x in fitness_sorted[0:10]]
        # print(opt_choose_idx)
        groups_update = copy.deepcopy(groups)
        parents = groups.iloc[opt_choose_idx,:]

        # 生成子代
        childs = []
        # 1. 交叉
        j = 0
        while j < 10:
            idx = np.random.randint(0, 12)
            cross_left = list(range(idx))
            cross_right = list(range(idx, 12))
            # 切割
            tmp_f_left = list(groups_update.iloc[j, cross_left])
            tmp_f_right = list(groups_update.iloc[j, cross_right])
            tmp_m_left = list(groups_update.iloc[j + 1, cross_left])
            tmp_m_right = list(groups_update.iloc[j + 1, cross_right])

            tmp_f_left.extend(tmp_m_right)
            tmp_m_left.extend(tmp_f_right)

            childs.append(tmp_f_left)
            childs.append(tmp_m_left)
            j += 2
        childs = pd.DataFrame(childs)
        
        # 2. 变异
        for k in range(10):
            # print(childs[i])
            variate_idx = np.random.randint(0, 12)  # 随机生成变异点
            tmp = np.random.random()
            # print(tmp)
            # print(childs[i])
            if tmp <= variation_rate:
                if childs.iloc[k, variate_idx] == 1:
                   childs.iloc[k, variate_idx] = 0
                else:
                   childs.iloc[k, variate_idx] = 1
                   
        groups_update = pd.concat([parents, childs],  ignore_index = True)
        return groups_update

    def run(self):
        init_groups = copy.deepcopy(self.groups)
        times = 0
        score = []
        while times < 100:
            optimize, optimize_score = self.optimize_choose(init_groups)
            groups_update = self.cross_variation(init_groups, 0.1)
            init_groups = copy.deepcopy(groups_update)
            score.append(optimize_score)
            times += 1
            print('[Generation {}]当前最优解得分{}'.format(times, optimize_score))
        
        print('最优解序列为', optimize)


model_GA = GA(df) 
model_GA.run()

踩坑记录1: 关于赋值与传参

groups(Dataframe)格式在传参时，如果单纯使用groups_update = groups进行赋值，在后续的迭代中发现groups为list格式，这导致了后续迭代出错终止。(猜测因为两个变量为一个内存地址，出现了干扰），建议使用：
import copy
groups_update = copy.deepcopy(groups)