When reading Evolutionary Computation for Modeling and Optimization[1], I found following problem in section 1.2.3:
A string evolver is an evolutionary algorithm that tries to match a reference string from a population of random strings. The underlying character set of the string evolver is the alphabet from which the strings are drawn.
The solution given in the context is:
- Start with a reference string and a population of random strings.
- The fitness of a string is the number of positions in which it has the same character as the reference string.
- To evolve the population, split it into small random groups called tournaments.
- Copy the most fit string(break ties by picking at random among the most fit strings) over the least fit string in each tournament.
- Then change one randomly chosen character in each copy(mutation).
- Repeat until an exact match with the reference string is obtained.
When trying this solution, I noticed that it won't converge even after a long time. Therefore, I modified the crossover and mutation strategy.A subset of population(strings) will be selected based on fitness in a tournament, and the bottom-ranked 50% of each tournament will be deleted. New population is formed by crossing the remaining high 20 individuals in a tournament, while the high-ranked parents are kept unchanged[2]. Finally, a random selected string in a tournament will be mutated.
My codes are:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define POP 20
#define TNMT 4 // Total number of tournaments
#define MAXSTR 80
char strPop[POP][MAXSTR]; // population of strings
char* REF="Hello,world!"; // Reference string
char* mostFit[TNMT]; // the most fit string of each tournament
char* leastFit[TNMT]; // the least fit string of each
tournament
char GenRandChar()
{
return (char)(32+rand()%96);
}
char * GenRandStr(char* str, unsigned int len)
{
int i;
for(i=0;i<len-1;++i)
{
str[i]=GenRandChar();
}
str[i]='';
return str;
}
// Copy the most fit string(break ties by picking at
// random among the most fit strings) over the least
// fit string in each tournament.
unsigned int CrossoverM2L(char* strMostFit, char* strLeastFit, unsigned int len)
{
int i,p;
char tmp;
// Generate random position. Make sure the string
// won't be broken at the first or last character.
p=rand()%(len-2)+1;
for(i=0;i<p;++i)
{
tmp=strMostFit[i];
strMostFit[i]=strLeastFit[i];
strLeastFit[i]=tmp;
}
return p;
}
// Change one randomly chosen character in each copy.
unsigned int Mutate(char* str,unsigned int len)
{
int p;
p=rand()%len;
str[p]=GenRandChar();
return p;
}
// Count the number of identical chars between ref and str.
unsigned int GetFitness(char* str,unsigned int len)
{
int i;
int cnt=0;
for(i=0;i<len;++i)
{
if(REF[i]==str[i])
cnt++;
}
return cnt;
}
// Find out the most and least fit strings in a tournament.
// tourn -- the number of tourn, starting from 0.
void GetFitOfTnmt(char* mostFit[],char* leastFit[],unsigned int tourn)
{
int i,fit;
char* str;
int most_fit,least_fit;
// Make sure the most and least fit strings are different
mostFit[tourn]=strPop[tourn*POP/TNMT];
most_fit=GetFitness(mostFit[tourn],strlen(REF));
leastFit[tourn]=strPop[tourn*POP/TNMT+1];
least_fit=GetFitness(leastFit[tourn],strlen(REF));
for(i=1;i<POP/TNMT;++i)
{
str=strPop[tourn*POP/TNMT+i];
fit=GetFitness(str,strlen(REF));
if(most_fitfit)
{
least_fit=fit;
leastFit[tourn]=str;
}
}
}
// Get the most fit string of the population
char* GetMostFit(char* mostFit[])
{
int i,fit;
char* strFit=mostFit[0];
int most_fit=GetFitness(strFit,strlen(REF));
for(i=1;i<TNMT;++i)
{
fit=GetFitness(mostFit[i],strlen(REF));
if(most_fit<fit)
{
most_fit=fit;
strFit=mostFit[i];
}
}
return strFit;
}
// Sort the strings in a tournament by fitness.
void SortTournByFit(unsigned int tourn)
{
int tmpInt;
unsigned int fit[POP/TNMT];
int i,j;
int len=strlen(REF);
char tmpStr[MAXSTR];
// Get the fitness of each string in this tournament
for(i=0;i<POP/TNMT;i++)
{
fit[i]=GetFitness(strPop[tourn*POP/TNMT+i],len);
}
// Sort strings by fitness
for(i=0;i<POP/TNMT;i++)
{
for(j=i+1;j<POP/TNMT;j++)
{
if(fit[i]<fit[j])
{
tmpInt=fit[i];
fit[i]=fit[j];
fit[j]=tmpInt;
strcpy(tmpStr,strPop[tourn*POP/TNMT+i]);
strcpy(strPop[tourn*POP/TNMT+i],strPop[tourn*POP/TNMT+j]);
strcpy(strPop[tourn*POP/TNMT+j],tmpStr);
}
} // end of for j
} // end of for i
}
// Crossover the top half strings.
// Remove the second half strings after sorting, keep the top half,
// and generate children by crossing over top half strings.
void CrossoverByFit(unsigned int tourn)
{
int i,j,p,len;
char* strTmp;
SortTournByFit(tourn);
// Generate random position. Make sure the string
// won't be broken at the first or last character.
len=strlen(REF);
p=rand()%(len-2)+1;
for(i=0;i<(int)POP/TNMT/2;i+=2)
{
for(j=0;j
p;--j)
{
strPop[(tourn+1)*POP/TNMT-1-i][j]=strPop[tourn*POP/TNMT+i+1][j];
strPop[(tourn+1)*POP/TNMT-2-i][j]=strPop[tourn*POP/TNMT+i][j];
}
}
}
// Print population by tournaments.
void DumpPop()
{
int i,j;
char* curStr;
for(i=0;i<TNMT;i++)
{
printf("Tournament %d:\n",i);
for(j=0;j<POP/TNMT;j++)
{
curStr=strPop[i*POP/TNMT+j];
printf("\t%s\tFitness: %d\n",curStr,GetFitness(curStr,strlen(curStr)));
}
printf("\n");
}
}
//
// MAIN START HERE
//
int main()
{
int LEN=strlen(REF);
int i,j,p;
long cnt=0;
char * strFit=NULL;
// Initionlization
for(i=0;i<POP;++i)
{
GenRandStr(strPop[i],LEN);
}
printf("---------------- Original Population ---------------\n");
DumpPop();
for(i=0;i<TNMT;++i)
GetFitOfTnmt(mostFit,leastFit,i);
strFit=GetMostFit(mostFit);
while(GetFitness(strFit,LEN) != LEN)
//while(GetFitness(strFit,LEN)<5)
{
printf("\n---------------- Generation %d ----------------\n",cnt);
DumpPop();
printf("%s\tReference\n",REF);
printf("%s\tGen: %d\tFitness: %d\n",strFit,cnt++,GetFitness(strFit,LEN));
for(i=0;i<TNMT;++i)
{
// Crossover
//CrossoverM2L(mostFit[i],leastFit[i],LEN);
CrossoverByFit(i);
// Mutation
p=rand()%(POP/TNMT);
Mutate(strPop[i*POP/TNMT+p],LEN);
// Re-calculate the fitness
GetFitOfTnmt(mostFit,leastFit,i);
}
strFit=GetMostFit(mostFit);
}
strFit=GetMostFit(mostFit);
printf("\n---------------- Generation %d ----------------\n",cnt);
DumpPop();
printf("%s\tReference\n",REF);
printf("%s\tGen: %d\tFitness: %d\n",strFit,cnt++,GetFitness(strFit,LEN));
return 0;
}
And the output is:
---------------- Generation 0 ----------------
Tournament 0:
F~AKK[b;tV] Fitness: 0
\\agA_q>vr Fitness: 0
OGG&Gy*(|yj Fitness: 0
6s[rEA8|45/ Fitness: 0
W.e[:bYXlCz Fitness: 0
Tournament 1:
6n?y9o??VN# Fitness: 0
8mmZ~f:Dfn< Fitness: 0
i-nV!P`{oQ Fitness: 0
8%p%)\3-j;( Fitness: 0
92S2}&jU"f Fitness: 0
Tournament 2:
38IrA~lrHq| Fitness: 0
RF51Vo7V9]v Fitness: 0
Z(tUX<w|!{m Fitness: 1
qNoPy'a;OS] Fitness: 0
1`]em-+SM~~ Fitness: 0
Tournament 3:
o^f(T&zQC3Q Fitness: 0
($g[j@ Asi Fitness: 0
@gb#E`!QGUb Fitness: 0
%H/>N)Kt_Pm Fitness: 0
i>Z+F0@6/'L Fitness: 0
Hello,world! Reference
Z(tUX<w|!{m Gen: 0 Fitness: 1
---------------- Generation 1 ----------------
Tournament 0:
F~AKK[b;tVx Fitness: 0
\\agA_q>vr Fitness: 0
OGG&Gy*(|yj Fitness: 0
\\agA_q>5] Fitness: 0
F~AKK[b;tCr Fitness: 0
Tournament 1:
6n?y9o??VN# Fitness: 0
kmmZ~f:Dfn< Fitness: 0
i-nV!P`{oQ Fitness: 0
8mm%9o??VN# Fitness: 0
6n?2~f:Dfn< Fitness: 0
Tournament 2:
Z(tUX<w|!{m Fitness: 1
RF51Vo7V9]v Fitness: 0
38IrA~lrHq| Fitness: 0
RFoUX<w|r{m Fitness: 2
Z(]1Vo7V9]v Fitness: 0
Tournament 3:
o^f(T&zQC3Q Fitness: 0
($g[j@ Asi Fitness: 0
@gb#E`!QGUb Fitness: 0
($/(T&zQC3Q Fitness: 0
@^Z[j@ Asi Fitness: 0
Hello,world! Reference
RFoUX<w|r{m Gen: 1 Fitness: 2
---------------- Generation 2 ----------------
Tournament 0:
F~AKK[b;tVx Fitness: 0
\\agA_q>vr Fitness: 0
OGG&Gy*(|yj Fitness: 0
\\agA_q>v] Fitness: 0
F~AeK[b;tVr Fitness: 0
Tournament 1:
Pn?y9o??VN# Fitness: 0
kmmZ~f:Dfn< Fitness: 0
i-nV!P`{oQ Fitness: 0
kmmZ~f??VN# Fitness: 0
6n?y9o:Dfn< Fitness: 0
Tournament 2:
RFoDX<w|r{m Fitness: 2
Z(tUX<w|!{m Fitness: 1
38IrA~lrHq| Fitness: 0
Z(t1X<w|r{m Fitness: 2
RFo1X<w|!{m Fitness: 1
Tournament 3:
o^f(T&zQC3Q| Fitness: 0
($g[j@ Asi Fitness: 0
@gb#E`!QGUb Fitness: 0
($f(T&zQC3Q Fitness: 0
o^g[j@ Asi Fitness: 0
Hello,world! Reference
RFoDX<w|r{m Gen: 2 Fitness: 2
---------------- Generation 3 ----------------
Tournament 0:
F~AKK[b;tVx Fitness: 0
\\agA_q>vr Fitness: 0
OGG&Gy*(|yj Fitness: 0
\\KK[b;tVx Fitness: 0
F~Aag~_q>vr Fitness: 0
Tournament 1:
Pn?y9o??VN# Fitness: 0
kmmZ~f:Dfn<@ Fitness: 0
i-nV!P`{oQ Fitness: 0
km?y9o??VN# Fitness: 0
PnmZ~f:Dfn< Fitness: 0
Tournament 2:
RFoDX<w|r{m Fitness: 2
Z(t1X<w|r{m Fitness: 2
Z(tUX~w|!{m Fitness: 1
Z(oDX<w|r{m Fitness: 2
RFI1X<w|r{m Fitness: 2
Tournament 3:
o^f(T&zQC3Q| Fitness: 0
($g[j@ Asi Fitness: 0
@gb#E`!QGUb Fitness: 0
($g[W@ QC3Q| Fitness: 0
o^f(T&zAsi Fitness: 0
Hello,world! Reference
RFoDX<w|r{m Gen: 3 Fitness: 2
---------------- Generation 4 ----------------
Tournament 0:
F~AKK[b;tVx Fitness: 0
\\JgA_q>vr Fitness: 0
OGG&Gy*(|yj Fitness: 0
\\KK[b;tVx Fitness: 0
F~AagA_q>vr Fitness: 0
Tournament 1:
Pn?y9o??VN# Fitness: 0
kmmZ~f:Dfn<@ Fitness: 0
i-nV!P`{oQ Fitness: 0
kmmZ~o??VN# Fitness: 0
Pnsy9f:Dfn<@ Fitness: 0
Tournament 2:
RFoDX<w|r{m Fitness: 2
Z(t1X<w|r{m Fitness: 2
Z(oDXFw|r{m Fitness: 2
Z(t1X<w|r{m Fitness: 2
RFoDX<w|r{m Fitness: 2
Tournament 3:
o^f(T'zQC3Q| Fitness: 0
($g[j@ Asi Fitness: 0
@gb#E`!QGUb Fitness: 0
($g[j@ AsQ| Fitness: 0
o^f(T&zQC3i Fitness: 0
Hello,world! Reference
RFoDX<w|r{m Gen: 4 Fitness: 2
......
......
---------------- Generation 2342 ----------------
Tournament 0:
Hpllk,world! Fitness: 10
Hpllo,world! Fitness: 11
Hpllo,world! Fitness: 11
Hpllo,world! Fitness: 11
Hpllo,world! Fitness: 11
Tournament 1:
Hello,worlF! Fitness: 11
Hello,worlf! Fitness: 11
Hello,worlf! Fitness: 11
Hello,worlf! Fitness: 11
Hello,worlf! Fitness: 11
Tournament 2:
Hello,worldh Fitness: 11
Hello,worldJ Fitness: 11
Hello,worldh Fitness: 11
Hello,worldh Fitness: 11
Hello,worVdJ Fitness: 10
Tournament 3:
He:lo,w4rld! Fitness: 10
He:lo,w0rld! Fitness: 10
He:lo,w4rld! Fitness: 10
He:lo,w0rld! Fitness: 10
He:2o,w4rld! Fitness: 9
Hello,world! Reference
Hpllo,world! Gen: 2342 Fitness: 11
---------------- Generation 2343 ----------------
Tournament 0:
Hp|lo,world! Fitness: 10
Hpllo,world! Fitness: 11
Hpllo,world! Fitness: 11
Hpllo,world! Fitness: 11
Hpllo,world! Fitness: 11
Tournament 1:
Hello,worlF! Fitness: 11
Hello,wo&lf! Fitness: 10
Hello,worlf! Fitness: 11
Hello,worlF! Fitness: 11
Hello,worlf! Fitness: 11
Tournament 2:
Hello,worldh Fitness: 11
Hello,worldJ Fitness: 11
Hello,worldh Fitness: 11
Hello,wor&dh Fitness: 10
Hello,worldJ Fitness: 11
Tournament 3:
He:lo,w4rld! Fitness: 10
He:lo,w0rld! Fitness: 10
He:lo,w4rld! Fitness: 10
He:lo,w0rld! Fitness: 10
He:lozw4rld! Fitness: 9
Hello,world! Reference
Hpllo,world! Gen: 2343 Fitness: 11
---------------- Generation 2344 ----------------
Tournament 0:
Hpllo,world! Fitness: 11
Hpllo,world! Fitness: 11
H\llo,world! Fitness: 11
Hpllo,world! Fitness: 11
Hpllo,world! Fitness: 11
Tournament 1:
Hello,worlF! Fitness: 11
Hello,worlf! Fitness: 11
Hello,world! Fitness: 12
Hello,worlF! Fitness: 11
Hello,wo&lf! Fitness: 10
Tournament 2:
Hello,world Fitness: 11
Hello,worldJ Fitness: 11
Hello,worldh Fitness: 11
Hello,worldh Fitness: 11
Hello,worldJ Fitness: 11
Tournament 3:
He:lo,w44ld! Fitness: 9
He:lo,w0rld! Fitness: 10
He:lo,w4rld! Fitness: 10
He:lo,w0rld! Fitness: 10
He:lo,w4rld! Fitness: 10
Hello,world! Reference
Hello,world! Gen: 2344 Fitness: 12
-----------------------------------------------------------------
References:
[1]: Daniel Ashlock, Evolutionary Computation for Modeling and Optimization, Springer, 2005.
[2]: FORREST, S., WEIMER, W., NGUYEN, T., AND GOUES, C. L., A Genetic Programming Approach to Automated Software Repair, In GECCO (July 2009).