统计文档中的单词出现的次数
给定一个.txt文件要去统计出文件长度,文件有多少个单词,每个单词出现的次数,并按照降序输出。
思路:
定义结构体存放单词和次数
读取文件
求文件长度
判断是否为单词并将单词存入结构体
统计每个单词出现次数
排序
代码如下:
#include<stdio.h>
#include<assert.h>
#include<string.h>
#include<malloc.h>
#include<cstdlib>
#define MAX 500000
typedef struct Word
{
char arr[40];
int count;
}Word;
typedef struct Num
{
Word *brr;
int length;
}N;
N Z;
int Size(char *p)//计算文件长度
{
FILE *fp=fopen(p,"r");
fseek(fp,0,SEEK_END);
int size=ftell(fp);
//rewind(fp);
fclose(fp);
return size;
}
char* Read(char *p)//读文件到str
{
FILE *fr = fopen(p,"r");
assert (fr != NULL);
int size=Size(p);
char *str=(char *)malloc((size+1)*sizeof(char));
for(int i=0;i<size;i++)
{
str[i]=fgetc(fr);
}
str[size] = '\0';
//printf("%s\n",str);
fclose(fr);
return str;
}
bool Is_Alpha(char p)//判断字符
{
//assert(p != NULL);
if (p >= 'a' && p <= 'z' || p >= 'A' && p <= 'Z'||p == 39||p == 45)// ' 和 - 也算字符
{
return true;
}
else
{
return false;
}
}
void Getword(char *str)
{
void Count(Word *end);
Z.length=0;
Z.brr->count=0;
char* head=str;
Word* A=Z.brr;
while(*head!='\0')
{
bool Switch=true;
int i=0;
while(Is_Alpha(*head))
{
A->arr[i++]=*head;
head++;
if(Switch)
{
Z.length++;
A->count=1;
Switch=false;
}
}
if(!Switch)
{
A->arr[i]='\0';
A++;
}
head++;
}
A--;
printf("总单词数为 %d\n",Z.length);
Count(A);
}
void Count(Word *end)
{
Word *head=Z.brr;
Word *head2=NULL;
while(head<=end)
{
head2=head+1;
if(head->count==0)
{
head++;
continue;
}
while(head2<end)
{
if(strcmp(head->arr,head2->arr)==0&&head2->count!=0)
{
head->count++;
head2->count=0;
}
head2++;
}
head++;
}
}
void Myprint(int len)
{
int max=0;
int flg2;
for(int i=0;i<7072;i++)
{
for(int j=0;j<len;j++)
{
if(max < Z.brr[j].count)
{
max = Z.brr[j].count;
flg2 = j;
}
}
printf("%s ",Z.brr[flg2].arr);
printf("%d\n",Z.brr[flg2].count);
Z.brr[flg2].count = 0;
max = 0;
flg2 = 0;
}
}
int main()
{
char *src="E:\\1.txt";
assert(src!=NULL);
Z.brr = (Word*)malloc(MAX*sizeof(Word));
printf("文件总长度为:%d\n",Size(src));
Getword(Read(src));
Myprint(Z.length);
//printf("%s\n",Read(src));
system("pause");
return 0;
}