题目:任给一个英文资料的文本文件,要求编程完成以下功能,统计该文本文件的所有单词
(每个单词不包括空格及跨行,单词由字母序列构成且不区分大小写),
并按字典顺序排列存储在一个文件里,统计每个单词在文本文件中出现的总次数、
频度(该单词出现的总次数,所有单词出现次数之和)、在文本文件中首次出现的行号及位置)。
// An highlighted block
/*任给一个英文资料的文本文件,要求编程完成以下功能,统计该文本文件的所有单词
(每个单词不包括空格及跨行,单词由字母序列构成且不区分大小写),
并按字典顺序排列存储在一个文件里,统计每个单词在文本文件中出现的总次数、
频度(该单词出现的总次数,所有单词出现次数之和)、在文本文件中首次出现的行号及位置.
单词都储存到结构数组,一个结构数组中有四个内容单词,行,列,次数,
别忘了最后一行字符结尾加个回车*/
#include <stdio.h>
#include <string.h>
#define MAX 50
#define WMAX 200
#define WENMAX 9999
struct yingwen
{
char word[MAX];
int row; //行
int col; //列
int times; //次数
int y;//该行的第几个单词
};
int main(void)
{
struct yingwen danci[WMAX];
FILE *fp;
char ch; //单个字母的读取
char filename[100];
int ac;
char wenben[WENMAX];
char curword[MAX]; //读取当前单词
int jishu = 0; //统计文本中的总字符
int number = 0; //用于计算单个单词的长度
int now = 0; //当前所在wenben的位置
int lie = 0; //用于记录列的位置
int houxu = 0; //用于记录第一行之外的列数
double suoyou = 0; //记录所有单词出现的次数
int m = 1;
int n = 1;
int k = 1;
int s = 0;
int xz=1;
printf("请输入需要打开文件的文件名:\n");
scanf("%s", filename);
if ((fp = fopen(filename, "r")) == NULL)
{
printf("文件不存在!");
return 0;
}
fp = fopen(filename, "r");
for (int i = 0; (ac = getc(fp)) != EOF; i++)
{
if ((ac >= 65) && (ac <= 90)) //大写转小写
ac = ac + 32;
wenben[i] = (char)ac;
wenben[i + 1] = '\0';
jishu++;
}
int j = 0;
for (int c = now; wenben[c] != ' '; c++) //第一个单词和后续单词分开进入结构数组
{
curword[c] = wenben[c];
number++;
now++;
}
for (int f = 0; f < number; f++)
{
danci[j].word[f] = curword[f];
}
danci[j].row = m;
danci[j].col = n;
danci[j].times = k;
danci[j].y=xz;
n = n + number;
while (wenben[now] == '\n')
{
m++;
n = 0;
k++;
now++;
}
if (wenben[now] == ' ')
now = now + 1;
j++;
number = 0;
suoyou++;
lie = now;
xz++;
while (danci[s].word != NULL && jishu != now) //读取单词同时进入结构数组
{
for (j; j < WMAX && jishu != now; j++) //外循环按单词看
{
suoyou++;
number = 0;
for (int c = now; wenben[c] != ' ' && wenben[c] != '\n' && wenben[c] != '\0'; c++) //统计单个单词及长度
{
curword[number] = wenben[c];
curword[number + 1] = '\0';
number++;
now++;
if (m == 1)
lie = now;
}
int e;
for (e = 0; danci[e].word != NULL && danci[e].times != NULL; e++)
{
if (strcmp(danci[e].word, curword) == 0)
{
danci[e].times++;
if (strcmp(danci[e].word, curword) == 0 && wenben[now] == '\n')
{
m++;
n = 0;
now = now + 1;
xz=1;
houxu = now;
--j;
}
else if (strcmp(danci[e].word, curword) == 0 && wenben[now] == ' ')
{
now = now + 1;
xz++;
if (m == 1)
lie = now;
--j;
break;
}
break;
}
}
while (strcmp(danci[e].word, curword) != 0 && jishu != now)
{
for (int f = 0; f < number && curword[f] != '\0'; f++)
{
danci[j].word[f] = curword[f];
}
danci[j].row = m;
if (m != 1)
lie = now - houxu;
n = lie - number + 1;
danci[j].col = n;
danci[j].times = k;
danci[j].y=xz;
while (wenben[now] == '\n')
{
m++;
n = 0;
now = now + 1;
houxu = now;
xz=1;
}
if (wenben[now] == ' ')
{
now = now + 1;
xz++;
if (m == 1)
lie = now;
break;
}
else
{
break;
}
}
}
s++;
}
struct yingwen linshi[1]; //临时的结构数组用于交换值
int q = 0; //q用于统计结构数组的项数
for (int g = 0; danci[g].word != NULL && danci[g].times != NULL; g++)
q++;
int r = 0;
for (int t = 0; t < q; t++) //冒泡排序
{
r++;
for (int g = 0; g < (q - r); g++)
{
if (strcmp(danci[g].word, danci[g + 1].word) > 0)
{
for (int z = 0; danci[g].word[z] != '\0'; z++)
{
linshi[0].word[z] = danci[g].word[z];
linshi[0].word[z + 1] = '\0';
}
linshi[0].row = danci[g].row;
linshi[0].col = danci[g].col;
linshi[0].times = danci[g].times;
linshi[0].y=danci[g].y;
for (int z = 0; danci[g + 1].word[z] != '\0'; z++)
{
danci[g].word[z] = danci[g + 1].word[z];
danci[g].word[z + 1] = '\0';
}
danci[g].row = danci[g + 1].row;
danci[g].col = danci[g + 1].col;
danci[g].times = danci[g + 1].times;
danci[g].y=danci[g+1].y;
for (int z = 0; linshi[0].word[z] != '\0'; z++)
{
danci[g + 1].word[z] = linshi[0].word[z];
danci[g + 1].word[z + 1] = '\0';
}
danci[g + 1].row = linshi[0].row;
danci[g + 1].col = linshi[0].col;
danci[g + 1].times = linshi[0].times;
danci[g+1].y=linshi[0].y;
}
}
}
FILE *fk;
fk = fopen("dancijieguo.txt", "w");
fprintf(fk, "每个单词在文本中出现的总次数,频度,在文本文件中首次出现的行号及位置为:\n");
for (int i = 0; danci[i].word != NULL && danci[i].times != NULL; i++)
{
for (int j = 0; danci[i].word[j] != '\0'; j++)
fprintf(fk, "%c", danci[i].word[j]);
fprintf(fk, " 次数:%d 频度:%.3f 行号:%d 位置:%d行 %d列 第%d个单词\n", danci[i].times, danci[i].times / suoyou, danci[i].row, danci[i].row, danci[i].col,danci[i].y);
}
fclose(fk);
return 0;
}