问题描述:
在当前目录中存在文件名为“case14.in”的文本文件,其内容为一篇英文文章(以EOF作为结束标志)。现要求读取该文本文件内容,统计文章中每个单词出现的次数,并输出出现次数最多的前5个单词及其出现次数(按出现次数由多到少的顺序输出,次数相同时按字典顺序输出,不足5个单词时,按顺序输出全部单词)。程序中注意如下细节:
(1)空格、标点符号与回车符起到分隔单词的作用。
(2)文章一行的末尾可能有连字符,出现连字符时,该行最末的字符串与下行最先出现的字符串构成一个单词;
(3)名词缩写算一个单词;
(4)数字不算单词;
(5)单词不区分大小写;
(6)输出时单词全是用小写。
#include <stdio.h>
#include <string.h>
struct WORDCOUNT{
char word[20];
int count;
};
int main()
{
FILE *fp;
struct WORDCOUNT wordcount[10000];
char essay[200000], ch, temp[20];
int i, j, k, len, count, t;
fp = fopen("case14.in", "r");
for(i=0; (ch = fgetc(fp)) != EOF; i++)
essay[i] = ch;
len = i;
//全部字符写到essay数组中
for(i=0; i<len; i++)
{
if(essay[i]=='-')
{
for(j=i+1; j<len; j++)
essay[j-1] = essay[j];
len--;
}
else if(essay[i]>='A' && essay[i]<='Z')
essay[i] +=32;
else if(essay[i]>='a' && essay[i]<='z')
essay[i]= essay[i];
else essay[i] = ' ';
}
//删除多余空格
for(i=1; i<len; i++)
{
if(essay[i-1]==' ' && essay[i]==' ')
{
for(j=i+1; j<len; j++)
essay[j-1] = essay[j];
len--;
}
}
//分词
k=0;
for(i=0; i<10000, k<len; i++)
{
for(j=0; j<20 && essay[k]!=' ' && k<len; j++, k++)
wordcount[i].word[j] = essay[k];
k++;
}
count = i;
//赋值
for(i=0; i<count; i++)
wordcount[i].count = 1;
//计算单词数
for(i=0; i<count; i++)
{
for(j=i+1; j<count; j++)
{
if(strcmp(wordcount[i].word, wordcount[j].word)==0)
{
wordcount[i].count += wordcount[j].count;
for(k=j+1; k<count; k++)
strcpy(wordcount[k-1].word, wordcount[k].word);
count--;
}
}
}
//按单词排序
for(i=0; i<count-1; i++)
{
for(j=1; j<count-i; j++)
{
if(strcmp(wordcount[j-1].word, wordcount[j].word) > 0)
{
strcpy(temp, wordcount[j-1].word);
strcpy(wordcount[j-1].word, wordcount[j].word);
strcpy(wordcount[j].word, temp);
t = wordcount[j-1].count;
wordcount[j-1].count = wordcount[j].count;
wordcount[j].count = t;
}
}
}
//按数量排序
for(i=0; i<count-1; i++)
{
for(j=1; j<count-i; j++)
{
if(wordcount[j-1].count < wordcount[j].count)
{
strcpy(temp, wordcount[j-1].word);
strcpy(wordcount[j-1].word, wordcount[j].word);
strcpy(wordcount[j].word, temp);
t = wordcount[j-1].count;
wordcount[j-1].count = wordcount[j].count;
wordcount[j].count = t;
}
}
}
if(count<5)
for(i=0; i<count; i++)
printf("%s %d\n", wordcount[i].word, wordcount[i].count);
else
for(i=0; i<5; i++)
printf("%s %d\n", wordcount[i].word, wordcount[i].count);
}