import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
from random import randint
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
def split(df,label):
X_tr,X_te,Y_tr,Y_te = train_test_split(df,label,test_size=0.25,random_state=42)
return X_tr,X_te,Y_tr,Y_te
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from google.colab import drive
drive.mount('/content/drive')
classifiers = ['LinearSVM', 'RadialSVM',
'Logistic', 'RandomForest',
'AdaBoost', 'DecisionTree',
'KNeighbors','GradientBoosting']
models = [svm.SVC(kernel='linear'),
svm.SVC(kernel='rbf'),
LogisticRegression(max_iter = 1000),
RandomForestClassifier(n_estimators=200, random_state=0),
AdaBoostClassifier(random_state = 0),
DecisionTreeClassifier(random_state=0),
KNeighborsClassifier(),
GradientBoostingClassifier(random_state=0)]
def acc_score(df,label):
Score = pd.DataFrame({"Classifier":classifiers})
j = 0
acc = []
X_train,X_test,Y_train,Y_test = split(df,label)
for i in models:
model = i
model.fit(X_train,Y_train)
predictions = model.predict(X_test)
acc.append(accuracy_score(Y_test,predictions))
j = j+1
Score["Accuracy"] = acc
Score.sort_values(by="Accuracy", ascending=False,inplace = True)
Score.reset_index(drop=True, inplace=True)
return Score
def plot(score,x,y,c = "b"):
gen = [1,2,3,4,5]
plt.figure(figsize=(6,4))
ax = sns.pointplot(x=gen, y=score,color = c )
ax.set(xlabel="Generation", ylabel="Accuracy")
ax.set(ylim=(x,y))
#初始化种群,随机地剔除30%的特征
def initilization_of_population(size,n_feat):
population = []
for i in range(size):
chromosome = np.ones(n_feat,dtype=np.bool)#二进制编码
chromosome[:int(0.3*n_feat)]=False
np.random.shuffle(chromosome)
population.append(chromosome)
return population
#适应度函数,二分类问题适用logistics回归
def fitness_score(population):
scores = []
for chromosome in population:
logmodel.fit(X_train.iloc[:,chromosome],Y_train)
predictions = logmodel.predict(X_test.iloc[:,chromosome])
scores.append(accuracy_score(Y_test,predictions))
scores, population = np.array(scores), np.array(population)
inds = np.argsort(scores)
return list(scores[inds][::-1]), list(population[inds,:][::-1])
#适者生存,选取适应度高的个体
def selection(pop_after_fit,n_parents):
population_nextgen = []
for i in range(n_parents):
population_nextgen.append(pop_after_fit[i])
return population_nextgen
#杂交
def crossover(pop_after_sel):
pop_nextgen = pop_after_sel
for i in range(0,len(pop_after_sel),2):
new_par = []
child_1 , child_2 = pop_nextgen[i] , pop_nextgen[i+1]
new_par = np.concatenate((child_1[:len(child_1)//2],child_2[len(child_1)//2:]))
pop_nextgen.append(new_par)
return pop_nextgen
#变异
def mutation(pop_after_cross,mutation_rate,n_feat):
mutation_range = int(mutation_rate*n_feat)
pop_next_gen = []
for n in range(0,len(pop_after_cross)):
chromo = pop_after_cross[n]
rand_posi = []
for i in range(0,mutation_range):
pos = randint(0,n_feat-1)
rand_posi.append(pos)
for j in rand_posi:
chromo[j] = not chromo[j]
pop_next_gen.append(chromo)
return pop_next_gen
#迭代
def generations(df,label,size,n_feat,n_parents,mutation_rate,n_gen,X_train,
X_test, Y_train, Y_test):
best_chromo= []
best_score= []
population_nextgen=initilization_of_population(size,n_feat)
for i in range(n_gen):
scores, pop_after_fit = fitness_score(population_nextgen)
print('Best score in generation',i+1,':',scores[:1]) #2
pop_after_sel = selection(pop_after_fit,n_parents)
pop_after_cross = crossover(pop_after_sel)
population_nextgen = mutation(pop_after_cross,mutation_rate,n_feat)
best_chromo.append(pop_after_fit[0])
best_score.append(scores[0])
return best_chromo,best_score
遗传算法复刻
最新推荐文章于 2024-05-21 15:39:16 发布