- /**
- * 对一段英文的词频统计
- * @Author xxingup@gmail.com
- * date 2010/06/17
- */
- #include <iostream>
- #include <string>
- using namespace std;
- /**
- * 单词对象
- */
- struct Word
- {
- Word() : Str(""), Count(0)
- {}
- string Str;
- int Count;
- /**
- * 交换单词(用于排序)
- * @param word 交换的单词对象
- */
- void exchange(Word &word)
- {
- string tStr = word.Str;
- int tCount = word.Count;
- word.Str = Str;
- word.Count = Count;
- Str = tStr;
- Count = tCount;
- }
- };
- /**
- * 统计词频
- * @param words 单词数组
- * @param newWord 单词内容
- * @param size 单词总数
- */
- void CalcCount(Word * words, string &newWord, int size)
- {
- int i = 0;
- for(; i < size; i++)
- {
- if(words[i].Str == newWord)
- {
- words[i].Count++;
- return;
- }
- else if(words[i].Str == "")
- break;
- }
- words[i].Str = newWord;
- words[i].Count = 1;
- }
- /**
- * 以单词出现频率降序排列单词
- * @param words 单词数组
- * @param size 单词数量
- */
- void SortWordDown(Word * words, int size)
- {
- for(int i = 0; i < size; i++)
- {
- for(int j = 0; j < size-1; j++)
- {
- if(words[j].Count < words[j+1].Count)
- {
- words[j].exchange(words[j+1]);
- }
- }
- }
- }
- int main()
- {
- Word * words;
- string content;
- cout << "输入一段英文:";
- getline(cin, content);
- //计算单词总数
- int wCount = 1;
- for(unsigned int i = 0; i < content.length(); i++)
- {
- if(content[i] == ' ')
- wCount++;
- }
- words = new Word[wCount];
- string::size_type offset = content.find(' ');//单词都是以空格隔开
- while(offset != string::npos)
- {
- string wStr = content.substr(0, offset);
- content.erase(0, offset+1);
- CalcCount(words, wStr, wCount);
- offset = content.find(' ');
- }
- CalcCount(words, content, wCount);//计算最后一个单词
- SortWordDown(words, wCount);
- int printCount = wCount < 5 ? wCount : 5;
- cout << "出现频率最高的前" << printCount << "个单词是:" << endl;
- for(int i = 0; i < printCount; i++)
- {
- cout << words[i].Str << "/t频率:" << words[i].Count << "次" << endl;
- }
- delete [] words;
- return 0;
- }
效果: