利用GA实现对SVM超参数的优化
最近在学习如何用遗传算法对SVM的超参数进行优化,参考了一些其他博主的文章,实现了这一方法。
遗传算法程序
首先定义SVM和MSE的函数:
def msefunc(predictval, realval):
squaredError = []
# absError = []
for i in range(len(predictval)):
val = predictval[i] - realval[i]
squaredError.append(val * val) # 预测值与真实值之差的平方
print("Square Error: ", squaredError)
print("MSE = ", sum(squaredError) / len(squaredError)) # 均方误差MSE
return sum(squaredError) / len(squaredError)
def SVMResult(vardim, x, bound):
X_train = [[0, 0], [2, 2], [1, 4], [3, 7], [3, 6]]
y_train = [0.5, 2.5, 3.0, 4.0, 5]
X_valid = [[1, 1], [3, 5]]
y_valid = [3, 4]
c = x[0]
e = x[1]
g = x[2]
clf = svm.SVR(C=c, epsilon=e, gamma=g, kernel='rbf')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_valid)
print("y_pred is", y_pred , "y_true is" , y_valid)
# 返回svm的mse作为适应度值
return msefunc(y_pred, y_valid)
遗传算法种群个体计算:
class GAIndividual:
'''
individual of genetic algorithm
创建pop中的单个个体
'''
def __init__(self, vardim, bound):
'''
vardim: dimension of variables
bound: boundaries of variables
'''
self.vardim = vardim
self.bound = bound
self.fitness = 0.
def generate(self):
'''
generate a random chromsome for genetic algorithm
'''
len = self.vardim
rnd = np.random.random(size=len)
self.chrom = np.zeros(len)
for i in range(0, len):
self.chrom[i] = self.bound[0, i] + \
(self.bound[1, i] - self.bound[0, i]) * rnd[i]
def calculateFitness(self):
'''
calculate the fitness of the chromsome
'''
self.fitness = SVMResult(self.vardim, self.chrom, self.bound)
建立种群衍变的类,其中种群选择、交叉、变异的方法可以根据需要重新定义。
class GeneticAlgorithm:
'''
The class for genetic algorithm
'''
def __init__(self, sizepop, vardim, bound, MAXGEN, params):
'''
sizepop: population sizepop
vardim: dimension of variables
bound: boundaries of variables
MAXGEN: termination condition
param: algorithm required parameters, it is a list which is consisting
of crossover rate, mutation rate, alpha
'''
self.sizepop = sizepop
self.MAXGEN = MAXGEN
self.vardim = vardim
self.bound = bound
self.population = []
self.fitness = np.zeros((self.sizepop, 1))
self.trace = np.zeros((self.MAXGEN, 2))
self.params = params
def initialize(self):
'''
initialize the population
'''
for i in range(0, self.sizepop):
ind = GAIndividual(self.vardim, self.bound)
ind.generate()
self.population.append(ind)
def evaluate(self):
'''
evaluation of the population fitnesses
'''
for i in range(0, self.sizepop):
self.population[i].calculateFitness()
self.fitness[i] = self.population[i].fitness
def solve(self):
'''
evolution process of genetic algorithm
'''
self.t = 0 # 迭代次数
self.initialize() # 初始化种群
self.evaluate() # 计算适应度
best = np.max(self.fitness) #选出适应度最大的个体
bestIndex = np.argmax(self.fitness) # 最大适应度的索引
self.best = copy.deepcopy(self.population[bestIndex])
self.avefitness = np.mean(self.fitness) # 平均适应度
self.trace[self.t, 0] = (1 - self.best.fitness) / self.best.fitness
self.trace[self.t, 1] = (1 - self.avefitness) / self.avefitness
print(
"Generation %d: optimal function value is: %f; average function value is %f"
% (self.t, self.trace[self.t, 0], self.trace[self.t, 1]))
while (self.t < self.MAXGEN - 1):
self.t += 1
self.selectionOperation() # 选择
self.crossoverOperation() # 交叉
self.mutationOperation() # 变异
self.evaluate() # 重新计算新种群适应度
best = np.max(self.fitness)
bestIndex = np.argmax(self.fitness)
if best > self.best.fitness:
self.best = copy.deepcopy(self.population[bestIndex])
self.avefitness = np.mean(self.fitness)
# 种群中表现最好的个体的适应度变化
self.trace[self.t, 0] = (1 - self.best.fitness) / self.best.fitness
# 种群平均适应度变化
self.trace[self.t, 1] = (1 - self.avefitness) / self.avefitness
print(
"Generation %d: optimal function value is: %f; average function value is %f"
% (self.t, self.trace[self.t, 0], self.trace[self.t, 1]))
print("Optimal function value is: %f; " % self.trace[self.t, 0])
print("Optimal solution is:",self.best.chrom)
self.printResult()
def selectionOperation(self):
'''
selection operation for Genetic Algorithm
'''
newpop = []
totalFitness = np.sum(self.fitness)
accuFitness = np.zeros((self.sizepop, 1))
# 适应度的累进占比
sum1 = 0.
for i in range(0, self.sizepop):
accuFitness[i] = sum1 + self.fitness[i] / totalFitness
sum1 = accuFitness[i]
# 随机选出新种群的索引
for i in range(0, self.sizepop):
r = random.random()
idx = 0
for j in range(0, self.sizepop - 1):
if j == 0 and r < accuFitness[j]:
idx = 0
break
elif r >= accuFitness[j] and r < accuFitness[j + 1]:
idx = j + 1
break
newpop.append(self.population[idx])
self.population = newpop
def crossoverOperation(self):
'''
crossover operation for genetic algorithm
'''
newpop = []
# 选出两个个体进行交换
for i