* The below func, btlfsa, (better than levenstien for spelling apps)
* produces better results when comparing words like haert against
* haart and heart.
*
* For example here is the output of levenshtein compared to btlfsa
* when comparing 'haert' to 'herat, haart, heart, harte'
*
* btlfsa('haert','herat'); output is.. 3
* btlfsa('haert','haart'); output is.. 3
* btlfsa('haert','harte'); output is.. 3
* btlfsa('haert','heart'); output is.. 2
*
* levenshtein('haert','herat'); output is.. 2
* levenshtein('haert','haart'); output is.. 1
* levenshtein('haert','harte'); output is.. 2
* levenshtein('haert','heart'); output is.. 2
*
* In other words, if you used levenshtein, 'haart' would be the
* closest match to 'haert'. Where as, btlfsa sees that it should be
* 'heart'
*/functionbtlfsa($word1,$word2)
{$score=0;// For each char that is different add 2 to the score
// as this is a BIG difference$remainder=preg_replace("/[".preg_replace("/[^A-Za-z0-9\']/",' ',$word1)."]/i",'',$word2);$remainder.=preg_replace("/[".preg_replace("/[^A-Za-z0-9\']/",' ',$word2)."]/i",'',$word1);$score=strlen($remainder)*2;// Take the difference in string length and add it to the score$w1_len=strlen($word1);$w2_len=strlen($word2);$score+=$w1_len>$w2_len?$w1_len-$w2_len:$w2_len-$w1_len;// Calculate how many letters are in different locations
// And add it to the score i.e.
//
// h e a r t
// 1 2 3 4 5
//
// h a e r t a e = 2
// 1 2 3 4 5 1 2 3 4 5
//$w1=$w1_len>$w2_len?$word1:$word2;$w2=$w1_len>$w2_len?$word2:$word1;
for($i=0;$i
{
if ( !isset($w2[$i]) ||$w1[$i] !=$w2[$i] )
{$score++;
}
}
return$score;
}// *************************************************************
// Here is a full code example showing the difference$misspelled='haert';// Imagine that these are sample suggestions thrown back by soundex or metaphone..$suggestions= array('herat','haart','heart','harte');// Firstly order an array based on levenshtein$levenshtein_ordered= array();
foreach ($suggestionsas$suggestion)
{$levenshtein_ordered[$suggestion] =levenshtein($misspelled,$suggestion);
}asort($levenshtein_ordered,SORT_NUMERIC);
print"Suggestions as ordered by levenshtein...
";print_r($levenshtein_ordered);
print"";// Secondly order an array based on btlfsa$btlfsa_ordered= array();
foreach ($suggestionsas$suggestion)
{$btlfsa_ordered[$suggestion] =btlfsa($misspelled,$suggestion);
}asort($btlfsa_ordered,SORT_NUMERIC);
print"Suggestions as ordered by btlfsa...
";print_r($btlfsa_ordered);
print"
";?>