SoundEx 是一种拼音算法,用于按英语发音来索引姓名,它最初由美国人口调查局开发。 SoundEx 方法返回一个表示姓名的四字符代码,由一个英文
字母后跟三个数字构成。 字母是姓名的首字母,数字对姓名中剩余的辅音字母编码。 发音相近的姓名具有相同的 SoundEx 代码。
static private string SoundEx(string word)
{
// The length of the returned code.
int length = 4;
// Value to return.
string value = "";
// The size of the word to process.
int size = word.Length;
// The word must be at least two characters in length.
if (size > 1)
{
// Convert the word to uppercase characters.
word = word.ToUpper(System.Globalization.CultureInfo.InvariantCulture);
// Convert the word to a character array.
char[] chars = word.ToCharArray();
// Buffer to hold the character codes.
StringBuilder buffer = new StringBuilder();
buffer.Length = 0;
// The current and previous character codes.
int prevCode = 0;
int currCode = 0;
// Add the first character to the buffer.
buffer.Append(chars[0]);
// Loop through all the characters and convert them to the proper character code.
for (int i = 1; i < size; i++)
{
switch (chars[i])
{
case 'A':
case 'E':
case 'I':
case 'O':
case 'U':
case 'H':
case 'W':
case 'Y':
currCode = 0;
break;
case 'B':
case 'F':
case 'P':
case 'V':
currCode = 1;
break;
case 'C':
case 'G':
case 'J':
case 'K':
case 'Q':
case 'S':
case 'X':
case 'Z':
currCode = 2;
break;
case 'D':
case 'T':
currCode = 3;
break;
case 'L':
currCode = 4;
break;
case 'M':
case 'N':
currCode = 5;
break;
case 'R':
currCode = 6;
break;
}
// Check if the current code is the same as the previous code.
if (currCode != prevCode)
{
// Check to see if the current code is 0 (a vowel); do not process vowels.
if (currCode != 0)
buffer.Append(currCode);
}
// Set the previous character code.
prevCode = currCode;
// If the buffer size meets the length limit, exit the loop.
if (buffer.Length == length)
break;
}
// Pad the buffer, if required.
size = buffer.Length;
if (size < length)
buffer.Append('0', (length - size));
// Set the value to return.
value = buffer.ToString();
}
// Return the value.
return value;
}