二、源代码:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import copy
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
df = pd.read_csv('..\Save\data.csv')
df = df[['Amenity', 'Bump', 'Crossing', 'Junction',
'Railway', 'Roundabout', 'Station', 'Traffic_Calming',
'Give_Way', 'No_Exit', 'Stop', 'Traffic_Signal', 'Severity']]
class GA:
def __init__(self, df):
df_GA = copy.deepcopy(df)
df_GA = df_GA.replace(True, 1)
df_GA = df_GA.replace(False, 0)
X_GA = df_GA.drop(columns='Severity')
Y_GA = df_GA['Severity']
self.X_train_GA, self.X_test_GA, self.y_train_GA, self.y_test_GA = train_test_split(X_GA,
Y_GA,
test_size=0.3,
random_state=1)
groups = pd.DataFrame(np.random.randint(2, size=(20, 12)))
self.groups = groups
def caculate_fitness(self, groups):
fitness = []
for i in range(20):
col_index = [idx for idx, x in enumerate(groups.iloc[i, :]) if x == 1]
X_GA_train = pd.DataFrame(self.X_train_GA).iloc[:, col_index]
X_GA_test = pd.DataFrame(self.X_test_GA).iloc[:, col_index]
model = GaussianNB()
model.fit(X_GA_train, self.y_train_GA)
y_pred_GA = model.predict(X_GA_test)
fitness.append(accuracy_score(y_pred_GA, self.y_test_GA))
return fitness
def optimize_choose(self, groups):
fitness = self.caculate_fitness(groups)
optimize_idx = fitness.index(max(fitness))
optimize = pd.DataFrame(groups).iloc[optimize_idx, :]
optimize_score = max(fitness)
return optimize, optimize_score
def cross_variation(self, groups, variation_rate):
parents = pd.DataFrame()
fitness = self.caculate_fitness(groups)
fitness_sorted = sorted(enumerate(fitness), key=lambda x: x[1], reverse=True)
opt_choose_idx = [idx for idx, x in fitness_sorted[0:10]]
groups_update = copy.deepcopy(groups)
parents = groups.iloc[opt_choose_idx,:]
childs = []
j = 0
while j < 10:
idx = np.random.randint(0, 12)
cross_left = list(range(idx))
cross_right = list(range(idx, 12))
tmp_f_left = list(groups_update.iloc[j, cross_left])
tmp_f_right = list(groups_update.iloc[j, cross_right])
tmp_m_left = list(groups_update.iloc[j + 1, cross_left])
tmp_m_right = list(groups_update.iloc[j + 1, cross_right])
tmp_f_left.extend(tmp_m_right)
tmp_m_left.extend(tmp_f_right)
childs.append(tmp_f_left)
childs.append(tmp_m_left)
j += 2
childs = pd.DataFrame(childs)
for k in range(10):
variate_idx = np.random.randint(0, 12)
tmp = np.random.random()
if tmp <= variation_rate:
if childs.iloc[k, variate_idx] == 1:
childs.iloc[k, variate_idx] = 0
else:
childs.iloc[k, variate_idx] = 1
groups_update = pd.concat([parents, childs], ignore_index = True)
return groups_update
def run(self):
init_groups = copy.deepcopy(self.groups)
times = 0
score = []
while times < 100:
optimize, optimize_score = self.optimize_choose(init_groups)
groups_update = self.cross_variation(init_groups, 0.1)
init_groups = copy.deepcopy(groups_update)
score.append(optimize_score)
times += 1
print('[Generation {}]当前最优解得分{}'.format(times, optimize_score))
print('最优解序列为', optimize)
model_GA = GA(df)
model_GA.run()