以前从来没玩过遗传算法,这次是逼上梁山,因为RNNA实在太简单,没法写出上千字的report。实在是为了算法而算法,并不是为了得到什么更优的解。。。汗自己的学习态度一下。。。anyway,GA确实是可以得到一个比较优的解,由于时间关系,这次我没有认真研究它,只是囫囵吞枣,得了点皮毛中的皮毛而已。理解错了,请大侠指正。
先说无性繁殖,无性繁殖很简单拉,其中一种叫local search。就是把找到的路径取其中一段出来,交换其中点的位置,看看能不能得到更优的子路径。其实本质上就是暴力求解,只是因为500个城市太过庞大,500!比天文数字还天文数字。而取一段通常只有几个点,即使全排列也是可以接受的,但还是有极限,超过10个点就不太行了,10!已经是300w+,差不多到机器极限拉。
这里是一个local search的sample,在RNNA的结果上进行优化。结果差不多是184k+,比RNNA好一点了吧。。。
/* 500 cities Hamilton Path */
/* Repetitive Nearest Neighbour Algorithm (RNNA) */
/* with Local Search */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define CITYSIZE 500
/* 15000 > distance(0,0,10000,10000) */
#define MAXDIST 15000
/* 7500000 > 15000 * (500 - 1) */
#define MAXTOTAL 7500000
/* local search factor
* don't be greedy, greater number causes deeper recursive
*/
#define STEP 11
/* cities informatin */
typedef struct S_c {
int id;
int x;
int y;
int visited;
} CITY;
/* a linked-list record local path generated by perm() */
struct S_path {
int* path;
struct S_path* next;
struct S_path* prev;
} *head, *tail;
int tour[CITYSIZE], // best path
weight[CITYSIZE][CITYSIZE]={0}, // distances between cities
total = MAXTOTAL; // best path length
void showCity(CITY*,int); // show city coordinate,for test
void showDist(int[][CITYSIZE],int); // show distances,for test
void showPath(int*,int,int); // show found path
double distance(int,int,int,int); // distance formula
void perm(int*,int,int,int); // generate a permutation
void swap(int*,int*); // for perm()
void localSearch(int);
int findrepeat(); // check if result valid
int weightOf(int* array);
int main(){
time_t start, done;
time(&start);
FILE *fin;
int i, j;
CITY city[CITYSIZE];
int tmptour[CITYSIZE];
fin = fopen("cities.dat", "r");
/* read cities locations */
char dump; // useless information, used to omit ':'
for(i = 0; i < CITYSIZE; ++i){
fscanf(fin, "%d %c %d", &city[i].id, &dump, &city[i].x);
fscanf(fin, "%c %d", &dump, &city[i].y);
}
/* calculate cities distance */
for(i = 0; i < CITYSIZE; ++i){
for(j = 0; j < CITYSIZE; ++j){
if(i < j){
weight[i][j] = (int)distance(city[i].x, city[i].y,
city[j].x, city[j].y);
weight[j][i] = weight[i][j];
}//if end
}//for end
}// for end
int tmptotal = MAXTOTAL, startCty = 0;
for(; startCty < CITYSIZE; ++startCty){
/* if found better path */
if(tmptotal < total){
total = tmptotal;
memcpy(tour, tmptour, sizeof(tour));
showPath(tour, CITYSIZE, total);
}
/* initialization */
tmptotal = 0;
for(i = 0; i < CITYSIZE; ++i) city[i].visited = 0;
int nextCty, currCty = startCty;
tmptour[0] = startCty;
city[startCty].visited = 1;
for(i = 1; i < CITYSIZE; ++i){
int cost = MAXDIST;
for(j = 0; j < CITYSIZE; ++j){
if((cost > weight[currCty][j]) && // currCty~j is better and
(!city[j].visited) && // j hasn't been visited and
(j != currCty)){ // j is not currCty
cost = weight[currCty][j];
nextCty = j;
}// if end
}// for end
// step to next city
currCty = nextCty;
// add this city into path
tmptour[i] = currCty;
city[currCty].visited = 1;
// accumulate path length
tmptotal += cost;
}// for end
}// for end
/* RNNA finished here, local search start */
/* TODO: connect the start city with end city to form a circle path,
* then break the longest one in the loop.
*/
head = (struct S_path*)malloc(sizeof(struct S_path));
tail = (struct S_path*)malloc(sizeof(struct S_path));
head->next = tail;
head->prev = NULL;
tail->prev = head;
tail->next = NULL;
int step = 3;
for(; step < STEP; step++){
localSearch(step);
}
time(&done);
printf("execution time = %d seconds/n",done - start);
}
void showDist(int array[][CITYSIZE], int size){
int i, j;
for(i = 0; i < size; ++i){
for(j = 0; j < size; ++j){
printf("%4d,", array[i][j]);
}
printf("/n");
}
}
void showCity(CITY* array, int size){
int i;
for(i = 0; i < size; ++i){
printf("%d:%d:%d:%d/n", array[i].id,
array[i].x,
array[i].y,
array[i].visited);
}// for end
}
void showPath(int* array, int size, int total){
int i;
if(findrepeat()){printf("Wrong answer!/n");exit(0);}
if(total!=weightOf(array)){
printf("wrong answer!/n");exit(0);}
printf("%d:", total);
for(i = 0; i < size - 1; ++i) printf("%d,", array[i]);
printf("%d/n", array[i]);
}
double distance(int x1, int y1, int x2, int y2){
return sqrt((x1-x2)*(x1-x2)+(y1-y2)*(y1-y2));
}
void localSearch(int step){
int i = 0;
for(; (i + step) < CITYSIZE; i = i + step){
int j = 0;
int sublength = 0, newweight = 0, currBest;
for(; j < step; ++j) sublength += weight[tour[i+j]][tour[i+j+1]];
currBest = sublength;
/* for sequence 01234, we need a permutation of p(123) only.
* so, the lower and upper bound would be i + 1 and i + step - 1.
*/
//printf("perm(%d~%d):/n", i+1,i+step-1);
perm(tour, i + 1, i + step - 1, i + 1);
// check all new path in permutation.
struct S_path* onePath = tail->prev;
while(onePath != head){
onePath->prev->next = tail;
tail->prev = onePath->prev;
newweight = weight[tour[i]][onePath->path[0]] +
weight[onePath->path[step-2]][tour[i+step]];
for(j = 0; j < step - 2; ++j)
newweight += weight[onePath->path[j]][onePath->path[j+1]];
if(newweight < currBest){
printf("better route found:%d, original:%d/n", newweight, currBest);
currBest = newweight;
// update tour
printf("this path is: %d,", i);
for(j = 0; j < step - 1; ++j)
printf("%d,",onePath->path[j]);
printf("%d/n", i+step);
memcpy(tour+i+1,onePath->path,sizeof(int)*(step-1));
if(findrepeat()){printf("bingo!/n");exit(0);}
}
free(onePath->path);
free(onePath);
onePath = tail->prev;
}// while end
// update total length
total = total - sublength + currBest;
}// for end
showPath(tour,CITYSIZE, total);
}
/* variable 'original' record the start point(lower bound) in array,
* used for output, when invoke perm(), original and k is same.
*/
void perm(int* list, int k, int m, int original){
int i, j;
if(k > m){// exist a permutation...
struct S_path* myPath = (struct S_path*)malloc(sizeof(struct S_path));
struct S_path* tmp = head->next;
myPath->path = (int*)malloc(sizeof(int)*(m - original + 1));
head->next = myPath;
myPath->prev = head;
myPath->next = tmp;
tmp->prev = myPath;
for(i = original; i <= m; ++i) {myPath->path[i-original] = list[i];}
} else {
for(j = k; j <= m; ++j){
swap(&list[k], &list[j]);
perm(list, k + 1, m, original);
swap(&list[k], &list[j]);
}
}
}
void swap(int* a, int* b){
int temp = *a;
*a = *b;
*b = temp;
}
int findrepeat(){
int i, j;
for(i = 0; i < CITYSIZE; ++i){
int tmp = 0;
for(j = 0; j < CITYSIZE; ++j) if(i == tour[j]) tmp++;
if(tmp > 1){printf("error: repeat city!/n");return 1;}
}
return 0;
}
int weightOf(int* array){
int w = 0, i = 0;
for(; i < CITYSIZE - 1; ++i)
w += weight[array[i]][array[i+1]];
return w;
}
接下来,说说有性繁殖。这里谈的是最最简单的情况。有性繁殖包括crossover和mutation。 crossover就是取两条路径,交换他们的一部分,产生两条新路径看看有没有更好的结果。mutation则是在crossover的基础上,随机的产生一定概率的突变,说不定会得到一个更优解或者更优的parent,从而在下一代里得到更优解。本质上。。。就是撞大运拉。。。比较sophisticated的GA算法会在crossover和mutation时采用一些复杂的策略, 不过整体上都是一样的。
这里给出一个crossover的sample,连mutation都不考虑:
void ga(int separator){
time_t seed;
time(&seed);
srand(seed);
int gen = 0;
for(; gen < 1000000; ++gen){
printf("epoch %d:/n", gen);
rand(); // dump first number
// randomly pick two path index
int index1 = (int)((double)rand()*500/RAND_MAX);
int index2 = (int)((double)rand()*500/RAND_MAX);
// get the two paths copies
int path1[CITYSIZE], path2[CITYSIZE];
memcpy(path1, all500paths[index1], sizeof(path1));
memcpy(path2, all500paths[index2], sizeof(path2));
// exchange two fragments to get next epoch
int* frag1 = (int*)malloc(sizeof(int)*separator);
memcpy(frag1, path1, sizeof(int)*separator);
memcpy(path1, path2, sizeof(int)*separator);
memcpy(path2, frag1, sizeof(int)*separator);
free(frag1);
// re-generate path1 & path2
int i, check1[CITYSIZE] = {0}, check2[CITYSIZE] = {0};
// separator~CITYSIZE-1 is original path, doesn't changed
for(i = separator; i < CITYSIZE; ++i){
check1[path1[i]]++;
check2[path2[i]]++;
}
// check new part in path
// 0~seprator-1 is new part copied from the other one
for(i = 0; i < separator; ++i){
if(check1[path1[i]]){
int j = 0;
while(check1[j])j++; // step to first non-zero position
check1[j]++;
path1[i] = j;
}else check1[path1[i]]++;
if(check2[path2[i]]){
int j = 0;
while(check2[j])j++;
check2[j]++;
path2[i] = j;
}else check2[path2[i]]++;
}
int mytotal1 = 0, mytotal2 = 0;
// calculate new path length
for(i = 0; i < CITYSIZE - 1; ++i){
mytotal1 += weight[path1[i]][path1[i+1]];
mytotal2 += weight[path2[i]][path2[i+1]];
}
// update if better route found
if(mytotal1 < all500weights[path1[0]]){
printf("better found in path%d: %d/n", path1[0], mytotal1);
all500weights[path1[0]] = mytotal1;
memcpy(all500paths[path1[0]], path1, sizeof(int)*CITYSIZE);
}
if(mytotal2 < all500weights[path2[0]]){
printf("better found in path%d: %d/n", path2[0], mytotal2);
all500weights[path2[0]] = mytotal2;
memcpy(all500paths[path2[0]], path2, sizeof(int)*CITYSIZE);
}
}// for end, one epoch end
}
check[]数组用来再交换后检查重复的city,如果发现city重复了,就用check[]里第一个为零的city来替换,all500paths保存了用NNA产生的路径,all500weights时相应的路径长度。separator标明从哪里断开来产生下一代。
如果想得到更随机的结果,separator本身也是可以是一个随机数。另外,直接在500条路径用ga似乎并不是一个很好的population,在上面选几十条比较短的路径来做初始的population可能会比较好。我没有试过,不敢乱说拉。上面的代码跑1000w代也跑不进19w,汗。。。。我的ga好烂。。。。
还有一种思路完全不同的算法,Ant Colony Optimization,蚁群优化。想出来的人是通过观察蚂蚁觅食得到的灵感。原理是蚂蚁在经过的路上会留下一种叫信息素的东东,就当它是一种气味吧。蚂蚁会根据气味的浓淡来决定走哪条路线。某条路上味道越浓,蚂蚁挑这条路线的概率越大。听上去很简单,也很不可思议吧。实事证明,ACO的效果比GA不遑多让。可惜,算法和原理是两码事,我还没看明白。所以也给不出代码拉。。。hoho,有时间再说吧。
这里有关TSP,GA和ACO的算法都是来自internet,只有代码是原创。写给自己看,就不写reference了。