In order to crack “Vigenere Cipher” under the circumstance that the key length can be only 3, 4 or 5, I used frequency analysis to find possible keys and compared the Euclidean distance of all candidate keys calculated with “Relative frequencies of letters in the English language” to find correct key length and then the correct key.
My code follows the steps below:
1. Preparation
public static string bigfile=null; // complete file
public static int[] keylength = {3,4,5}; // keylength
public static string filename="sample.txt"; //file name
public static Dictionary<char, double> normalfre = new Dictionary<char, double>();
normalfre.Add('E', 0.12072); // standard language frequence pair
normalfre.Add('T', 0.09056);
normalfre.Add('A', 0.08167);
normalfre.Add('O', 0.07507);
normalfre.Add('I', 0.06966);
normalfre.Add('N', 0.06749);
normalfre.Add('S', 0.06327);
normalfre.Add('H', 0.06094);
First, I established a Dictionary called “normalfre” to describe the “Relative frequencies of letters in the English language” found on the internet [http://en.wikipedia.org/wiki/Letter_frequency]. It is used to compare with the frequency we got from the original text.
And candidate “keylength” is {3,4,5}. “filename” is “sample.txt”.
2. Import file
static void readfile(string filename)
{
bigfile=File.ReadAllText(filename);
}
Use function "ReadAllText" to get original string.
3. Find the key length
static int determinkeylength() {
Dictionary<int, double> avera = new Dictionary<int, double>(); // store average distance and key length
for (int j = 0; j < 3; j++)
{
List<string> text = divideByKeylength(keylength[j], bigfile); // divide file into keylength part and write into string[]
List<double> distances = new List<double>();
//determine key
for (int i = 0; i < keylength[j]; i++)
{
Dictionary<char, double> frequences = new Dictionary<char, double>();
frequences = frequencyAnalysis(text[i]);
double maxfre = frequences.Values.Max();
char maxChar = frequences.Keys.Where(c => frequences[c] == maxfre).LastOrDefault();
// find frequence table
Dictionary<char, double> kd = findKeyDistance(frequences);
double mindist = kd.Values.Min();
char key = kd.Keys.Where(c => kd[c] == mindist).LastOrDefault();
distances.Add(mindist);
//find possible key and it's distance with normal language frequences
}
avera.Add(j + 3, distances.Sum() / keylength[j]);
// System.Console.WriteLine("Average dis:{0}",avera[j]); // calculate average for determine key length
}
double minn = avera.Values.Min();
int finalkeylength = avera.Keys.Where(c => avera[c] == minn).LastOrDefault();
//System.Console.WriteLine(finalkeylength);
return finalkeylength;
}
}
4. Divide text by key length
static List<string> divideByKeylength(int keylen, string originalText) {
List<string> dividedfile = new List<string>();
StringBuilder[] sb = new StringBuilder[keylen];
for (int i = 0; i < keylen; i++)
sb[i] = new StringBuilder();
for (int i = 0; i < originalText.Length; i++)
sb[i % keylen].Append(originalText[i]);
foreach (var item in sb)
dividedfile.Add(item.ToString());
return dividedfile;
}
5. Frequency analysis for each divided text
static Dictionary<char, double> frequencyAnalysis(string dividedfile) {
Dictionary < char, double > fretable = new Dictionary < char, double > ();
double filelength = dividedfile.Length;
for (int i = 0; i < filelength; i++)
{
char key = dividedfile[i];
if (fretable.Keys.Contains(key))
fretable[key] = fretable[key] + 1/filelength;
else
fretable[key] = 1/filelength;
}
return fretable;
}
6. Calculate Euclidean distance between our result with normal English language letter frequency
static Dictionary<char, double> frequencyAnalysis(string dividedfile) {
Dictionary < char, double > fretable = new Dictionary < char, double > ();
double filelength = dividedfile.Length;
for (int i = 0; i < filelength; i++)
{
char key = dividedfile[i];
if (fretable.Keys.Contains(key))
fretable[key] = fretable[key] + 1/filelength;
else
fretable[key] = 1/filelength;
}
return fretable;
}
7. Key is the candidate key with minimum Euclidean distance