import numpy as np
from copy import deepcopy
import utils
import keras.backend
from keras.models import Model, Sequential
from keras.layers import Input, Add, Dense, Dropout, Flatten
from keras.layers import Activation, Conv2D, MaxPooling2D, AveragePooling2D
from keras.layers.advanced_activations import LeakyReLU
from keras import regularizers
from keras.optimizers import Adam, Nadam
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
import os
import tensorflow as tf
# Hide Tensorflow INFOS and WARNINGS
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'classParticle:def__init__(self, min_layer, max_layer, max_pool_layers, input_width, input_height, input_channels, \
conv_prob, pool_prob, fc_prob, max_conv_kernel, max_out_ch, max_fc_neurons, output_dim):
self.input_width = input_width
self.input_height = input_height
self.input_channels = input_channels
self.num_pool_layers =0
self.max_pool_layers = max_pool_layers
self.feature_width = input_width
self.feature_height = input_height
self.depth = np.random.randint(min_layer, max_layer)
self.conv_prob = conv_prob
self.pool_prob = pool_prob
self.fc_prob = fc_prob
self.max_conv_kernel = max_conv_kernel
self.max_out_ch = max_out_ch
self.max_fc_neurons = max_fc_neurons
self.output_dim = output_dim
self.layers =[]
self.acc =None
self.vel =[]# Initial velocity
self.pBest =[]# Build particle architecture
self.initialization()# Update initial velocityfor i inrange(len(self.layers)):if self.layers[i]["type"]!="fc":
self.vel.append({"type":"keep"})else:
self.vel.append({"type":"keep_fc"})
self.model =None
self.pBest = deepcopy(self)def__str__(self):
string =""for z inrange(len(self.layers)):
string = string + self.layers[z]["type"]+" | "return string
definitialization(self):
out_channel = np.random.randint(3, self.max_out_ch)
conv_kernel = np.random.randint(3, self.max_conv_kernel)# First layer is always a convolution layer
self.layers.append({"type":"conv","ou_c": out_channel,"kernel": conv_kernel})
conv_prob = self.conv_prob
pool_prob = conv_prob + self.pool_prob
fc_prob = pool_prob
for i inrange(1, self.depth):if self.layers[-1]["type"]=="fc":
layer_type =1.1else:
layer_type = np.random.rand()if layer_type < conv_prob:
self.layers = utils.add_conv(self.layers, self.max_out_ch, self.max_conv_kernel)elif layer_type >= conv_prob and layer_type <= pool_prob:
self.layers, self.num_pool_layers = utils.add_pool(self.layers, self.fc_prob, self.num_pool_layers, self.max_pool_layers, self.max_out_ch, self.max_conv_kernel, self.max_fc_neurons, self.output_dim)elif layer_type >= fc_prob:
self.layers = utils.add_fc(self.layers, self.max_fc_neurons)
self.layers[-1]={"type":"fc","ou_c": self.output_dim,"kernel":-1}defvelocity(self, gBest, Cg):
self.vel = utils.computeVelocity(gBest, self.pBest.layers, self.layers, Cg)defupdate(self):
new_p = utils.updateParticle(self.layers, self.vel)
new_p = self.validate(new_p)
self.layers = new_p
self.model =Nonedefvalidate(self, list_layers):# Last layer should always be a fc with number of neurons equal to the number of outputs
list_layers[-1]={"type":"fc","ou_c": self.output_dim,"kernel":-1}# Remove excess of Pooling layers
self.num_pool_layers =0for i inrange(len(list_layers)):if list_layers[i]["type"]=="max_pool"or list_layers[i]["type"]=="avg_pool":
self.num_pool_layers +=1if self.num_pool_layers >= self.max_pool_layers:
list_layers[i]["type"]="remove"# Now, fix the inputs of each conv and pool layers
updated_list_layers =[]for i inrange(0,len(list_layers)):if list_layers[i]["type"]!="remove":if list_layers[i]["type"]=="conv":
updated_list_layers.append({"type":"conv","ou_c": list_layers[i]["ou_c"],"kernel": list_layers[i]["kernel"]})if list_layers[i]["type"]=="fc":
updated_list_layers.append(list_layers[i])if list_layers[i]["type"]=="max_pool":
updated_list_layers.append({"type":"max_pool","ou_c":-1,"kernel":2})if list_layers[i]["type"]=="avg_pool":
updated_list_layers.append({"type":"avg_pool","ou_c":-1,"kernel":2})return updated_list_layers
##### Model methods ####defmodel_compile(self, dropout_rate):
list_layers = self.layers
self.model = Sequential()for i inrange(len(list_layers)):if list_layers[i]["type"]=="conv":
n_out_filters = list_layers[i]["ou_c"]
kernel_size = list_layers[i]["kernel"]if i ==0:
in_w = self.input_width
in_h = self.input_height
in_c = self.input_channels
self.model.add(Conv2D(n_out_filters, kernel_size, strides=(1,1), padding="same", data_format="channels_last", kernel_initializer='he_normal', bias_initializer='he_normal', activation=None, input_shape=(in_w, in_h, in_c)))
self.model.add(BatchNormalization())
self.model.add(Activation("relu"))else:
self.model.add(Dropout(dropout_rate))
self.model.add(Conv2D(n_out_filters, kernel_size, strides=(1,1), padding="same", kernel_initializer='he_normal', bias_initializer='he_normal', activation=None))
self.model.add(BatchNormalization())
self.model.add(Activation("relu"))if list_layers[i]["type"]=="max_pool":
kernel_size = list_layers[i]["kernel"]
self.model.add(MaxPooling2D(pool_size=(3,3), strides=2))if list_layers[i]["type"]=="avg_pool":
kernel_size = list_layers[i]["kernel"]
self.model.add(AveragePooling2D(pool_size=(3,3), strides=2))if list_layers[i]["type"]=="fc":if list_layers[i-1]["type"]!="fc":
self.model.add(Flatten())
self.model.add(Dropout(dropout_rate))if i ==len(list_layers)-1:
self.model.add(Dense(list_layers[i]["ou_c"], kernel_initializer='he_normal', bias_initializer='he_normal', activation=None))
self.model.add(BatchNormalization())
self.model.add(Activation("softmax"))else:
self.model.add(Dense(list_layers[i]["ou_c"], kernel_initializer='he_normal', bias_initializer='he_normal', kernel_regularizer=regularizers.l2(0.01), activation=None))
self.model.add(BatchNormalization())
self.model.add(Activation("relu"))
adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0.0)
self.model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"])defmodel_fit(self, x_train, y_train, batch_size, epochs):# TODO: add option to only use a sample size of the dataset
hist = self.model.fit(x=x_train, y=y_train, validation_split=0.0, batch_size=batch_size, epochs=epochs)return hist
defmodel_fit_complete(self, x_train, y_train, batch_size, epochs):
hist = self.model.fit(x=x_train, y=y_train, validation_split=0.0, batch_size=batch_size, epochs=epochs)return hist
defmodel_delete(self):# This is used to free up memory during PSO trainingdel self.model
keras.backend.clear_session()
self.model =None
main函数
import keras
from psoCNN import psoCNN
import numpy as np
import time
import keras.backend
import tensorflow as tf
import os
import matplotlib
import matplotlib.pyplot as plt
if __name__ =='__main__':
######## Algorithm parameters ##################
#dataset="mnist"#dataset="mnist-rotated-digits"#dataset="mnist-rotated-with-background"#dataset="rectangles"#dataset="rectangles-images"
dataset ="convex"#dataset="fashion-mnist"#dataset="mnist-random-background"#dataset="mnist-background-images"
number_runs =10
number_iterations =10
population_size =20
batch_size_pso =32
batch_size_full_training =32
epochs_pso =1
epochs_full_training =100
max_conv_output_channels =256
max_fully_connected_neurons =300
min_layer =3
max_layer =20#Probability of each layer type(should sum to 1)
probability_convolution =0.6
probability_pooling =0.3
probability_fully_connected =0.1
max_conv_kernel_size =7
Cg =0.5
dropout =0.5
########### Run the algorithm ######################
results_path ="./results/"+ dataset +"/"if not os.path.exists(results_path):
os.makedirs(results_path)
all_gBest_metrics = np.zeros((number_runs,2))
runs_time =[]
all_gbest_par =[]
best_gBest_acc =0for i in range(number_runs):print("Run number: "+str(i))
start_time = time.time()
pso =psoCNN(dataset=dataset, n_iter=number_iterations, pop_size=population_size,
batch_size=batch_size_pso, epochs=epochs_pso, min_layer=min_layer, max_layer=max_layer,
conv_prob=probability_convolution, pool_prob=probability_pooling,
fc_prob=probability_fully_connected, max_conv_kernel=max_conv_kernel_size,
max_out_ch=max_conv_output_channels, max_fc_neurons=max_fully_connected_neurons,
dropout_rate=dropout)
pso.fit(Cg=Cg, dropout_rate=dropout)print(pso.gBest_acc)#Plot current gBest
matplotlib.use('Agg')
plt.plot(pso.gBest_acc)
plt.xlabel("Iteration")
plt.ylabel("gBest acc")
plt.savefig(results_path +"gBest-iter-"+str(i)+".png")
plt.close()print('gBest architecture: ')print(pso.gBest)
np.save(results_path +"gBest_inter_"+str(i)+"_acc_history.npy", pso.gBest_acc)
np.save(results_path +"gBest_iter_"+str(i)+"_test_acc_history.npy", pso.gBest_test_acc)
end_time = time.time()
running_time = end_time - start_time
runs_time.append(running_time)#Fully train the gBest model found
n_parameters = pso.fit_gBest(batch_size=batch_size_full_training, epochs=epochs_full_training, dropout_rate=dropout)
all_gbest_par.append(n_parameters)#Evaluate the fully trained gBest model
gBest_metrics = pso.evaluate_gBest(batch_size=batch_size_full_training)if gBest_metrics[1]>= best_gBest_acc:
best_gBest_acc = gBest_metrics[1]#Save best gBest model
best_gBest_yaml = pso.gBest.model.to_yaml()
with open(results_path +"best-gBest-model.yaml","w") as yaml_file:
yaml_file.write(best_gBest_yaml)#Save best gBest model weights to HDF5 file
pso.gBest.model.save_weights(results_path +"best-gBest-weights.h5")
all_gBest_metrics[i,0]= gBest_metrics[0]
all_gBest_metrics[i,1]= gBest_metrics[1]print("This run took: "+str(running_time)+" seconds.")#Compute mean accuracy of all runs
all_gBest_mean_metrics = np.mean(all_gBest_metrics, axis=0)
np.save(results_path +"/time_to_run.npy", runs_time)#Save all gBest metrics
np.save(results_path +"/all_gBest_metrics.npy", all_gBest_metrics)#Save results in a text file
output_str ="All gBest number of parameters: "+str(all_gbest_par)+"\n"
output_str = output_str +"All gBest test accuracies: "+str(all_gBest_metrics[:,1])+"\n"
output_str = output_str +"All running times: "+str(runs_time)+"\n"
output_str = output_str +"Mean loss of all runs: "+str(all_gBest_mean_metrics[0])+"\n"
output_str = output_str +"Mean accuracy of all runs: "+str(all_gBest_mean_metrics[1])+"\n"print(output_str)
with open(results_path +"/final_results.txt","w") as f:
try:print(output_str, file=f)
except SyntaxError:
print >> f, output_str
train函数
import keras
from keras.datasets import mnist
from keras.datasets import fashion_mnist
from keras.datasets import cifar10
import keras.backend
from population import Population
import numpy as np
from copy import deepcopy
classpsoCNN:def__init__(self, dataset, n_iter, pop_size, batch_size, epochs, min_layer, max_layer, \
conv_prob, pool_prob, fc_prob, max_conv_kernel, max_out_ch, max_fc_neurons, dropout_rate):
self.pop_size = pop_size
self.n_iter = n_iter
self.epochs = epochs
self.batch_size = batch_size
self.gBest_acc = np.zeros(n_iter)
self.gBest_test_acc = np.zeros(n_iter)#... 这里省略一下对不同的数据集的各种数据预处理操作# 特征变成4维张量
self.x_train = self.x_train.reshape(self.x_train.shape[0], self.x_train.shape[1], self.x_train.shape[2], input_channels)
self.x_test = self.x_test.reshape(self.x_test.shape[0], self.x_test.shape[1], self.x_test.shape[2], input_channels)# 把标签进行one-hot编码
self.y_train = keras.utils.to_categorical(self.y_train, output_dim)
self.y_test = keras.utils.to_categorical(self.y_test, output_dim)print("Initializing population...")
self.population = Population(pop_size, min_layer, max_layer, input_width, input_height, input_channels, conv_prob, pool_prob, fc_prob, max_conv_kernel, max_out_ch, max_fc_neurons, output_dim)print("Verifying accuracy of the current gBest...")print(self.population.particle[0])
self.gBest = deepcopy(self.population.particle[0])
self.gBest.model_compile(dropout_rate)
hist = self.gBest.model_fit(self.x_train, self.y_train, batch_size=batch_size, epochs=epochs)
test_metrics = self.gBest.model.evaluate(x=self.x_test, y=self.y_test, batch_size=batch_size)
self.gBest.model_delete()
self.gBest_acc[0]= hist.history['accuracy'][-1]
self.gBest_test_acc[0]= test_metrics[1]
self.population.particle[0].acc = hist.history['accuracy'][-1]
self.population.particle[0].pBest.acc = hist.history['accuracy'][-1]print("Current gBest acc: "+str(self.gBest_acc[0])+"\n")print("Current gBest test acc: "+str(self.gBest_test_acc[0])+"\n")# 循环迭代搜索print("Looking for a new gBest in the population...")for i inrange(1, self.pop_size):print('Initialization - Particle: '+str(i+1))print(self.population.particle[i])
self.population.particle[i].model_compile(dropout_rate)
hist = self.population.particle[i].model_fit(self.x_train, self.y_train, batch_size=batch_size, epochs=epochs)
self.population.particle[i].model_delete()
self.population.particle[i].acc = hist.history['accuracy'][-1]
self.population.particle[i].pBest.acc = hist.history['accuracy'][-1]if self.population.particle[i].pBest.acc >= self.gBest_acc[0]:print("Found a new gBest.")
self.gBest = deepcopy(self.population.particle[i])
self.gBest_acc[0]= self.population.particle[i].pBest.acc
print("New gBest acc: "+str(self.gBest_acc[0]))
self.gBest.model_compile(dropout_rate)
test_metrics = self.gBest.model.evaluate(x=self.x_test, y=self.y_test, batch_size=batch_size)
self.gBest_test_acc[0]= test_metrics[1]print("New gBest test acc: "+str(self.gBest_acc[0]))
self.gBest.model_delete()# 训练函数deffit(self, Cg, dropout_rate):for i inrange(1, self.n_iter):
gBest_acc = self.gBest_acc[i-1]
gBest_test_acc = self.gBest_test_acc[i-1]for j inrange(self.pop_size):print('Iteration: '+str(i)+' - Particle: '+str(j+1))# Update particle velocity
self.population.particle[j].velocity(self.gBest.layers, Cg)# Update particle architecture
self.population.particle[j].update()print('Particle NEW architecture: ')print(self.population.particle[j])# Compute the acc in the updated particle
self.population.particle[j].model_compile(dropout_rate)
hist = self.population.particle[j].model_fit(self.x_train, self.y_train, batch_size=self.batch_size, epochs=self.epochs)
self.population.particle[j].model_delete()
self.population.particle[j].acc = hist.history['accuracy'][-1]
f_test = self.population.particle[j].acc
pBest_acc = self.population.particle[j].pBest.acc
if f_test >= pBest_acc:print("Found a new pBest.")print("Current acc: "+str(f_test))print("Past pBest acc: "+str(pBest_acc))
pBest_acc = f_test
self.population.particle[j].pBest = deepcopy(self.population.particle[j])if pBest_acc >= gBest_acc:print("Found a new gBest.")
gBest_acc = pBest_acc
self.gBest = deepcopy(self.population.particle[j])
self.gBest.model_compile(dropout_rate)
hist = self.gBest.model_fit(self.x_train, self.y_train, batch_size=self.batch_size, epochs=self.epochs)
test_metrics = self.gBest.model.evaluate(x=self.x_test, y=self.y_test, batch_size=self.batch_size)
self.gBest.model_delete()
gBest_test_acc = test_metrics[1]
self.gBest_acc[i]= gBest_acc
self.gBest_test_acc[i]= gBest_test_acc
print("Current gBest acc: "+str(self.gBest_acc[i]))print("Current gBest test acc: "+str(self.gBest_test_acc[i]))deffit_gBest(self, batch_size, epochs, dropout_rate):print("\nFurther training gBest model...")
self.gBest.model_compile(dropout_rate)
trainable_count =0for i inrange(len(self.gBest.model.trainable_weights)):
trainable_count += keras.backend.count_params(self.gBest.model.trainable_weights[i])print("gBest's number of trainable parameters: "+str(trainable_count))
self.gBest.model_fit_complete(self.x_train, self.y_train, batch_size=batch_size, epochs=epochs)return trainable_count
defevaluate_gBest(self, batch_size):print("\nEvaluating gBest model on the test set...")
metrics = self.gBest.model.evaluate(x=self.x_test, y=self.y_test, batch_size=batch_size)print("\ngBest model loss in the test set: "+str(metrics[0])+" - Test set accuracy: "+str(metrics[1]))return metrics
粒子群定义
from particle import Particle
classPopulation:def__init__(self, pop_size, min_layer, max_layer, input_width, input_height, input_channels, conv_prob, pool_prob, fc_prob, max_conv_kernel, max_out_ch, max_fc_neurons, output_dim):# Compute maximum number of pooling layers for any given particle
max_pool_layers =0
in_w = input_width
while in_w >4:
max_pool_layers +=1
in_w = in_w/2
self.particle =[]for i inrange(pop_size):
self.particle.append(Particle(min_layer, max_layer, max_pool_layers, input_width, input_height, input_channels, conv_prob, pool_prob, fc_prob, max_conv_kernel, max_out_ch, max_fc_neurons, output_dim))