函数主体部分
#define CL_TARGET_OPENCL_VERSION 120
#include <malloc.h>
#include <time.h>
#include <CL/cl.h>
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <fstream>
#include <stdlib.h>
#include <iostream>
using namespace std;
int convertToString(const char *filename, std::string& s);
const char* programSource=NULL;
void show_matrix(int* m,int dim1,int dim2,char* matrix_name);
void show_matrix_float(float* m,int dim1,int dim2,char* matrix_name);
void bubblesort(float *arr, int size)
{
int i, j, tmp;
for (i = 0; i < size - 1; i++) {
for (j = 0; j < size - i - 1; j++) {
if (arr[j] < arr[j+1]) {
tmp = arr[j];
arr[j] = arr[j+1];
arr[j+1] = tmp;
}
}
}
}
float max(float *arr,int size)
{
bubblesort(arr,size);
return arr[0];
}
void mut_vec(int *arr,int size,float pm)
{
for(int i=0;i<size;i++)
{
float mut_p;
mut_p=rand()%100*1.0/100;
if(mut_p<pm)
{
arr[i]=rand()%3-1;
}
}
}
void get_cross_rates(float *arr,int size)
{
for (int i = 0; i < size; i++)
arr[i] = rand()%10*1.0/10;
}
int* get_best_individual(int* population,int pop_num,int code_len,float* fitness,float best_fitness)
{
int* best_indi;
best_indi=(int*)malloc(sizeof(int)*code_len);
for(int i=0;i<pop_num;i++)
{
if (fitness[i]==best_fitness)
{
for(int j=0;j<code_len;j++)
{
best_indi[j]=population[i*code_len+j];
}
return best_indi;
}
}
}
int main() {
int *population = NULL;
int *population_original=NULL;
int *p1s = NULL;
int *p2s = NULL;
int *children1=NULL;
int *children2=NULL;
int *population_extend=NULL;
int *mutation_vector=NULL;
float *fitness=NULL;
float* fitness_sorted;
const int pop_num=5000;
const int p_num=pop_num/2;
const int pop_ex_num=2*pop_num;
const int code_len=20;
const float pc=0.6;
const float pm=0.1;
const int evol_num=100;
float *cross_rates=NULL;
int *best_p=NULL;
const int size_pop = pop_num*code_len;
const int size_pop_ex=2*size_pop;
const int size_p=pop_num/2*code_len;
population = (int*)malloc(sizeof(int)*size_pop);
population_original = (int*)malloc(sizeof(int)*size_pop);
population_extend = (int*)malloc(sizeof(int)*size_pop_ex);
p1s = (int*)malloc(sizeof(int)*size_p);
p2s = (int*)malloc(sizeof(int)*size_p);
children1=(int*)malloc(sizeof(int)*size_p);
children2=(int*)malloc(sizeof(int)*size_p);
best_p=(int*)malloc(sizeof(int)*code_len);
mutation_vector=(int*)malloc(sizeof(int)*code_len);
cross_rates=(float*)malloc(sizeof(float)*code_len);
fitness=(float*)malloc(sizeof(float)*(pop_num+p_num*2));
fitness_sorted=(float*)malloc(sizeof(float)*pop_num*2);
srand((unsigned)time(NULL));
int i;
for (i = 0; i < size_pop; i++)
{
population[i] = rand()%10;
population_original[i]=population[i];
}
p1s=population;
p2s=population+size_pop/2;
for (i = 0; i < code_len; i++)
best_p[i] = rand()%10;
printf("finish initiate!!!\n");
cl_int status;
cl_uint numPlatforms = 0;
status = clGetPlatformIDs(0, NULL, &numPlatforms);
cl_platform_id *platforms = NULL;
platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
cl_uint numDevices = 0;
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices);
cl_device_id *devices;
devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, numDevices, devices, NULL);
cl_context context;
context = clCreateContext(NULL, numDevices, devices, NULL, NULL, &status);
cl_command_queue cmdQueue;
cmdQueue = clCreateCommandQueue(context, devices[0], 0, &status);
cl_mem buf_population;
cl_mem buf_p1s;
cl_mem buf_p2s;
cl_mem buf_population_extend;
cl_mem buf_children1;
cl_mem buf_children2;
cl_mem buf_cross_rates;
cl_mem buf_best_p;
cl_mem buf_fitness;
cl_mem buf_fitness_sorted;
cl_mem buf_mutation_vector;
buf_population = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*size_pop,NULL, &status);
buf_p1s = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*size_pop/2,NULL, &status);
buf_p2s = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*size_pop/2,NULL, &status);
buf_population_extend = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*size_pop_ex,NULL, &status);
buf_children1 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*size_pop/2,NULL, &status);
buf_children2 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*size_pop/2,NULL, &status);
buf_fitness = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)*pop_num*2,NULL, &status);
buf_fitness_sorted = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)*pop_num*2,NULL, &status);
buf_mutation_vector = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)*code_len,NULL, &status);
buf_best_p = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int)*code_len,NULL, &status);
buf_cross_rates = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*code_len,NULL, &status);
status = clEnqueueWriteBuffer(cmdQueue, buf_population, CL_FALSE,0, sizeof(int)*size_pop, population, 0, NULL, NULL);
status = clEnqueueWriteBuffer(cmdQueue, buf_p1s, CL_FALSE,0, sizeof(int)*size_pop/2, p1s, 0, NULL, NULL);
status = clEnqueueWriteBuffer(cmdQueue, buf_p2s, CL_FALSE,0, sizeof(int)*size_pop/2, p2s, 0, NULL, NULL);
status = clEnqueueWriteBuffer(cmdQueue, buf_cross_rates, CL_FALSE,0, sizeof(float)*code_len, cross_rates, 0, NULL, NULL);
status = clEnqueueWriteBuffer(cmdQueue, buf_best_p, CL_FALSE,0, sizeof(int)*code_len, best_p, 0, NULL, NULL);
const char* crossover_fun_str=NULL;
const char* pop_merge_fun_str=NULL;
const char* fitness_fun_str=NULL;
const char* pop_produce_fun_str=NULL;
const char* mutation_fun_str=NULL;
std::string sourcestr;
std::string sourcestr_pop;
std::string sourcestr_fit;
std::string sourcestr_pop_produce;
std::string sourcestr_mutation;
convertToString("crossover.cl", sourcestr);
crossover_fun_str = sourcestr.c_str();
convertToString("pop_merge.cl", sourcestr_pop);
pop_merge_fun_str = sourcestr_pop.c_str();
convertToString("fitness.cl", sourcestr_fit);
fitness_fun_str = sourcestr_fit.c_str();
convertToString("pop_produce.cl", sourcestr_pop_produce);
pop_produce_fun_str = sourcestr_pop_produce.c_str();
convertToString("mutation.cl", sourcestr_mutation);
mutation_fun_str = sourcestr_mutation.c_str();
cl_program program_crossover = clCreateProgramWithSource(context, 1,(const char**)&crossover_fun_str, NULL, &status);
cl_program program_pop_merge = clCreateProgramWithSource(context, 1,(const char**)&pop_merge_fun_str, NULL, &status);
cl_program program_fitness = clCreateProgramWithSource(context, 1,(const char**)&fitness_fun_str, NULL, &status);
cl_program program_pop_produce = clCreateProgramWithSource(context, 1,(const char**)&pop_produce_fun_str, NULL, &status);
cl_program program_mutation = clCreateProgramWithSource(context, 1,(const char**)&mutation_fun_str, NULL, &status);
status = clBuildProgram(program_crossover, numDevices, devices,NULL, NULL, NULL);
status = clBuildProgram(program_pop_merge, numDevices, devices,NULL, NULL, NULL);
status = clBuildProgram(program_fitness, numDevices, devices,NULL, NULL, NULL);
status = clBuildProgram(program_pop_produce, numDevices, devices,NULL, NULL, NULL);
status = clBuildProgram(program_mutation, numDevices, devices,NULL, NULL, NULL);
cl_kernel kernel_crossover;
kernel_crossover = clCreateKernel(program_crossover, "crossover", &status);
cl_kernel kernel_pop_merge;
kernel_pop_merge = clCreateKernel(program_pop_merge, "pop_merge", &status);
cl_kernel kernel_fitness;
kernel_fitness = clCreateKernel(program_fitness, "fitness", &status);
cl_kernel kernel_pop_produce;
kernel_pop_produce = clCreateKernel(program_pop_produce, "pop_produce", &status);
cl_kernel kernel_mutation;
kernel_mutation = clCreateKernel(program_mutation, "mutation", &status);
size_t globalWorkSize[1];
globalWorkSize[0] = pop_num*2;
size_t global[2];
status = clSetKernelArg(kernel_crossover, 0, sizeof(int), &p_num);
status = clSetKernelArg(kernel_crossover, 1, sizeof(int), &code_len);
status = clSetKernelArg(kernel_crossover, 2, sizeof(float), &pc);
status = clSetKernelArg(kernel_crossover, 3, sizeof(cl_mem), &buf_cross_rates);
status = clSetKernelArg(kernel_crossover, 4, sizeof(cl_mem), &buf_p1s);
status = clSetKernelArg(kernel_crossover, 5, sizeof(cl_mem), &buf_p2s);
status = clSetKernelArg(kernel_crossover, 6, sizeof(cl_mem), &buf_children1);
status = clSetKernelArg(kernel_crossover, 7, sizeof(cl_mem), &buf_children2);
status = clSetKernelArg(kernel_mutation, 0, sizeof(int), &pop_num);
status = clSetKernelArg(kernel_mutation, 1, sizeof(int), &code_len);
status = clSetKernelArg(kernel_mutation, 2, sizeof(cl_mem), &buf_mutation_vector);
status = clSetKernelArg(kernel_mutation, 3, sizeof(cl_mem), &buf_population_extend);
status = clSetKernelArg(kernel_pop_merge, 0, sizeof(int), &p_num);
status = clSetKernelArg(kernel_pop_merge, 1, sizeof(int), &pop_num);
status = clSetKernelArg(kernel_pop_merge, 2, sizeof(int), &code_len);
status = clSetKernelArg(kernel_pop_merge, 3, sizeof(cl_mem), &buf_population_extend);
status = clSetKernelArg(kernel_pop_merge, 4, sizeof(cl_mem), &buf_population);
status = clSetKernelArg(kernel_pop_merge, 5, sizeof(cl_mem), &buf_children1);
status = clSetKernelArg(kernel_pop_merge, 6, sizeof(cl_mem), &buf_children2);
status = clSetKernelArg(kernel_fitness, 0, sizeof(int), &pop_num);
status = clSetKernelArg(kernel_fitness, 1, sizeof(int), &code_len);
status = clSetKernelArg(kernel_fitness, 2, sizeof(cl_mem), &buf_best_p);
status = clSetKernelArg(kernel_fitness, 3, sizeof(cl_mem), &buf_population_extend);
status = clSetKernelArg(kernel_fitness, 4, sizeof(cl_mem), &buf_fitness);
status = clSetKernelArg(kernel_pop_produce, 0, sizeof(int), &pop_num);
status = clSetKernelArg(kernel_pop_produce, 1, sizeof(int), &code_len);
status = clSetKernelArg(kernel_pop_produce, 2, sizeof(cl_mem), &buf_fitness_sorted);
status = clSetKernelArg(kernel_pop_produce, 3, sizeof(cl_mem), &buf_population_extend);
status = clSetKernelArg(kernel_pop_produce, 4, sizeof(cl_mem), &buf_population);
status = clSetKernelArg(kernel_pop_produce, 5, sizeof(cl_mem), &buf_fitness);
status = clEnqueueNDRangeKernel(cmdQueue, kernel_crossover, 1, NULL,globalWorkSize, NULL, 0, NULL, NULL);
mut_vec(mutation_vector,code_len,pm);
status = clEnqueueWriteBuffer(cmdQueue, buf_mutation_vector, CL_FALSE,0, sizeof(int)*code_len, mutation_vector, 0, NULL, NULL);
status = clEnqueueNDRangeKernel(cmdQueue, kernel_mutation, 1, NULL,globalWorkSize, NULL, 0, NULL, NULL);
status = clEnqueueNDRangeKernel(cmdQueue, kernel_pop_merge, 1, NULL,globalWorkSize, NULL, 0, NULL, NULL);
status = clEnqueueNDRangeKernel(cmdQueue, kernel_fitness, 1, NULL,globalWorkSize, NULL, 0, NULL, NULL);
clEnqueueReadBuffer(cmdQueue, buf_fitness, CL_TRUE, 0,sizeof(int)*pop_num*2, fitness, 0, NULL, NULL);
for(int i=0;i<pop_num*2;i++)
fitness_sorted[i]=fitness[i];
bubblesort(fitness_sorted,pop_num*2);
status = clEnqueueWriteBuffer(cmdQueue, buf_fitness_sorted, CL_FALSE,0, sizeof(int)*pop_num*2, fitness_sorted, 0, NULL, NULL);
status = clEnqueueNDRangeKernel(cmdQueue, kernel_pop_produce, 1, NULL,globalWorkSize, NULL, 0, NULL, NULL);
clEnqueueReadBuffer(cmdQueue, buf_population, CL_TRUE, 0,sizeof(int)*size_pop, population, 0, NULL, NULL);
time_t start,end;
start=time(NULL);
float best_fit;
float origin_best_fit;
for(int g=0;g<evol_num;g++)
{
get_cross_rates(cross_rates,code_len);
status = clEnqueueWriteBuffer(cmdQueue, buf_cross_rates, CL_FALSE,0, sizeof(int)*code_len, cross_rates, 0, NULL, NULL);
status = clEnqueueNDRangeKernel(cmdQueue, kernel_crossover, 1, NULL,globalWorkSize, NULL, 0, NULL, NULL);
mut_vec(mutation_vector,code_len,pm);
status = clEnqueueWriteBuffer(cmdQueue, buf_mutation_vector, CL_FALSE,0, sizeof(int)*code_len, mutation_vector, 0, NULL, NULL);
status = clEnqueueNDRangeKernel(cmdQueue, kernel_mutation, 1, NULL,globalWorkSize, NULL, 0, NULL, NULL);
status = clEnqueueNDRangeKernel(cmdQueue, kernel_pop_merge, 1, NULL,globalWorkSize, NULL, 0, NULL, NULL);
status = clEnqueueNDRangeKernel(cmdQueue, kernel_fitness, 1, NULL,globalWorkSize, NULL, 0, NULL, NULL);
clEnqueueReadBuffer(cmdQueue, buf_fitness, CL_TRUE, 0,sizeof(int)*pop_num*2, fitness, 0, NULL, NULL);
if(g==0)
origin_best_fit=max(fitness,pop_num*2);
for(int i=0;i<pop_num*2;i++)
fitness_sorted[i]=fitness[i];
bubblesort(fitness_sorted,pop_num*2);
status = clEnqueueWriteBuffer(cmdQueue, buf_fitness_sorted, CL_FALSE,0, sizeof(int)*pop_num*2, fitness_sorted, 0, NULL, NULL);
status = clEnqueueNDRangeKernel(cmdQueue, kernel_pop_produce, 1, NULL,globalWorkSize, NULL, 0, NULL, NULL);
clEnqueueReadBuffer(cmdQueue, buf_population, CL_TRUE, 0,sizeof(int)*size_pop, population, 0, NULL, NULL);
clEnqueueReadBuffer(cmdQueue, buf_fitness, CL_TRUE, 0,sizeof(int)*pop_num*2, fitness, 0, NULL, NULL);
bubblesort(fitness,pop_num*2);
best_fit=max(fitness,pop_num*2);
printf("%d generation:best fitness:%f\n",g,best_fit);
if(best_fit==4)
break;
}
end=time(NULL);
printf("time cost:%ld s\n",end-start);
clEnqueueReadBuffer(cmdQueue, buf_children1, CL_TRUE, 0,
sizeof(int)*size_pop/2, children1, 0, NULL, NULL);
clEnqueueReadBuffer(cmdQueue, buf_children2, CL_TRUE, 0,
sizeof(int)*size_pop/2, children2, 0, NULL, NULL);
clEnqueueReadBuffer(cmdQueue, buf_population_extend, CL_TRUE, 0,
sizeof(int)*size_pop*2, population_extend, 0, NULL, NULL);
int result = 1;
int* best_one=get_best_individual(population,pop_num,code_len,fitness,best_fit);
printf("The best fitness of original population:%f\n",origin_best_fit);
printf("The best fitness of final population:%f\n",best_fit);
clReleaseKernel(kernel_crossover);
clReleaseProgram(program_crossover);
clReleaseCommandQueue(cmdQueue);
clReleaseMemObject(buf_population);
clReleaseMemObject(buf_population_extend);
clReleaseMemObject(buf_children1);
clReleaseMemObject(buf_children2);
clReleaseMemObject(buf_best_p);
clReleaseContext(context);
free(population);
free(population_extend);
free(children1);
free(children2);
free(cross_rates);
free(best_p);
free(platforms);
free(devices);
return 0;
}
int convertToString(const char *filename, std::string& s)
{
size_t size;
char* str;
std::fstream f(filename, (std::fstream::in | std::fstream::binary));
if (f.is_open())
{
size_t fileSize;
f.seekg(0, std::fstream::end);
size = fileSize = (size_t)f.tellg();
f.seekg(0, std::fstream::beg);
str = new char[size + 1];
if (!str)
{
f.close();
return 0;
}
f.read(str, fileSize);
f.close();
str[size] = '\0';
s = str;
delete[] str;
return 0;
}
printf("Error: Failed to open file %s\n", filename);
return 1;
}
void show_matrix(int* m,int dim1,int dim2,char* matrix_name)
{
printf("matrix name is :%s \n",matrix_name);
for (int i = 0; i < dim1; i++) {
for (int j=0;j< dim2;j++)
printf("%d ",m[i*dim2+j]);
printf("\n");
}
}
void show_matrix_float(float* m,int dim1,int dim2,char* matrix_name)
{
printf("matrix name is :%s \n",matrix_name);
for (int i = 0; i < dim1; i++) {
for (int j=0;j< dim2;j++)
printf("%.3f ",m[i*dim2+j]);
printf("\n");
}
}
交叉操作核函数
__kernel void crossover(
const int p_num,
const int individual_len,
const float pc,
__global float* cross_rates,
__global int* p1s,
__global int* p2s,
__global int* children1,
__global int* children2)
{
int i = get_global_id(0);
int k;
if (i < p_num) {
for(int j=0;j<individual_len;j++)
{
if(cross_rates[j]>pc)
{
children1[i*individual_len+j]=p1s[i*individual_len+j];
children2[i*individual_len+j]=p2s[i*individual_len+j];
}
else
{
children1[i*individual_len+j]=p2s[i*individual_len+j];
children2[i*individual_len+j]=p1s[i*individual_len+j];
}
}
}
}
变异操作核函数
__kernel void mutation(
const int pop_num,
const int code_len,
__global int* mutation_vector,
__global int* population_extend)
{
int i = get_global_id(0);
if(i<pop_num*2)
{
for(int j=0;j<code_len;j++)
{
population_extend[i*code_len+j]+=mutation_vector[j];
}
}
}
选择操作核函数
__kernel void pop_produce(
const int pop_num,
const int code_len,
__global float* fitness_sorted,
__global int* population_extend,
__global int* population,
__global float* fitness)
{
int i = get_global_id(0);
if (i < pop_num)
{
for(int j=0;j<pop_num*2;j++)
{
if(fitness_sorted[i]==fitness[j])
{
for(int k=0;k<code_len;k++)
{
population[i*code_len+k]=population_extend[j*code_len+k];
}
}
}
}
}