姓名:张明熙
班级:0411202
学号:2012211735
一.题目
题目:分析一个文本文件中各个词出现的频率,并且把频率最高的10个单词打印出来,文本文件大小大约30kb---300kb大小。
四.心得体会
这次软件工程作业让我学习到了很多东西,包括对文本中词语的分析,C语言编程的知识等。
班级:0411202
学号:2012211735
一.题目
题目:分析一个文本文件中各个词出现的频率,并且把频率最高的10个单词打印出来,文本文件大小大约30kb---300kb大小。
二.程序结果
三.源代码
#include<stdio.h>
#include<stdlib.h>
#include<ctype.h>
#include<string.h>
#include <fcntl.h>
#define N 20
#define M 100000
#define STACK_INIT_SIZE 100
typedef struct ntree
{
char a[N];
int i;
struct ntree *left;
struct ntree *right;
}tree;
typedef struct WordNumber
{
char a[N];
int i;
struct WordNumber *left;
struct WordNumber *right;
}WN;
typedef struct {
tree **base;
tree **top;
int stacksize;
}sqstack;
int sign = 0, sum = 0, n;
char ko[N];
WN wonu[M];
int traverse(tree *);
int initstack(sqstack *S);
tree *push(sqstack *s, tree *p);
tree *pop(sqstack *s);
int createtree(tree *, char *, char *, long, long);
int jfcmp(char *, char *, int);
int sort();
int main()
{
tree root;
FILE *fp;
long at, fg;
char ch;
int i;
char t[M], wd[N];
printf(" ready to scan file\n");
fp = fopen("/Users/me/Downloads/oliver.txt", "rt");
if (fp == NULL)
{
printf("file does not exist\n");
return 0;
}
at = 0;
do
{
/* read a char from the file */
ch = fgetc(fp);
if (isprint(ch)){ t[at] = ch, at++; }
} while (ch != EOF);
fclose(fp);
for (i = 0; i<N; i++)ko[i] = 0;
//printf(" enter 0 to view result of words summary ordered by distionary\n");
printf(" enter 1 to view result of words summary ordered by frequency\n ");
scanf("%d", &n);
fg = 0;
i = 0;
strncpy(wd, ko, N);
while (fg<at)
{
if (isalpha(t[fg]))
{
wd[i] = t[fg];
i++;
}
if (t[fg] == 32 && i>0)break;
fg++;
}
strncpy(root.a, ko, N);
strcpy(root.a, wd);
root.i = 1;
root.left = NULL;
root.right = NULL;
i = 0;
strncpy(wd, ko, N);
while (fg<at)
{
if (isalpha(t[fg]))
{
wd[i] = t[fg];
i++;
}
if (t[fg] == 32 && i>0)break;
fg++;
}
createtree(&root, wd, t, fg, at);
traverse(&root);
if (n == 1)sort();
printf("number of words in the article%d\n", sum);
return 0;
}
//创建一棵二叉查找树
int createtree(tree *r, char *wd, char *t, long fg, long at)
{
tree *p, *q;
int i, j;
while (1)
{
p = r;
while (p != NULL)
{
j = jfcmp(wd, p->a, N);
if (j<0)
{
q = p;
p = p->left;
if (p == NULL)
{
p = (tree *)malloc(sizeof(tree));
strncpy(p->a, ko, N);
strncpy(p->a, wd, N);
p->i = 1;
p->left = NULL;
p->right = NULL;
q->left = p;
break;
}
}
if (j>0)
{
q = p;
p = p->right;
if (p == NULL)
{
p = (tree *)malloc(sizeof(tree));
strncpy(p->a, ko, N);
strncpy(p->a, wd, N);
p->i = 1;
p->left = NULL;
p->right = NULL;
q->right = p;
break;
}
}
if (j == 0)
{
p->i++;
break;
}
}
i = 0;
strncpy(wd, ko, N);
while (fg<at)
{
if (isalpha(t[fg]))
{
wd[i] = t[fg];
i++;
}
if (t[fg] == 32 && i>0)break;
fg++;
if (fg >= at)return 0;
}
}
return 0;
}
//比较两个字符串的大小(字典中)
int jfcmp(char *a, char *b, int n)
{
int i;
for (i = 0; i<N; i++)
{
if (a[i] == b[i])i++;
if (a[i]<b[i])return -1;
if (a[i]>b[i])return 1;
}
return 0;
}
//中序遍历一棵二叉树,非递归实现。
int traverse(tree *r)
{
tree *p, *q;
sqstack l;
initstack(&l);
p = r;
push(&l, p);
while (p == r || l.base != l.top)
{
if (p->left != NULL)
{
push(&l, p->left);
q = p;
p = p->left;
q->left = NULL;
}
else
{
p = pop(&l);
if (n == 0)
{
printf("%-6d", sign);
printf("%-20stimes-->", p->a);
printf("%-6d\n", p->i);
}
sum += p->i;
strncpy(wonu[sign].a, ko, N);
strncpy(wonu[sign].a, p->a, N);
wonu[sign].i = p->i;
sign++;
if (p->right != NULL)
{
push(&l, p->right);
q = p;
p = p->right;
q->right = NULL;
}
}
}
return 0;
}
int initstack(sqstack *S)
{
S->base = (tree **)malloc(STACK_INIT_SIZE*sizeof(tree));
if (!S->base)exit(-1);
S->top = S->base;
S->stacksize = STACK_INIT_SIZE;
return 0;
}
tree *push(sqstack *s, tree *p)
{
*(s->top) = p;
s->top++;
return 0;
}
tree *pop(sqstack *s)
{
tree *p;
if (s->top == s->base)return 0;
else p = *(--s->top);
return p;
}
int sort()
{
int i, j, temp;
char s[20];
for (i = 0; i<sign - 1; i++)
for (j = 0; j<sign - i - 1; j++)
{
if (wonu[j].i<wonu[j + 1].i)
{
strncpy(s, wonu[i].a, N);
strncpy(wonu[j].a, wonu[j + 1].a, N);
strncpy(wonu[j + 1].a, s, N);
temp = wonu[j].i;
wonu[j].i = wonu[j + 1].i;
wonu[j + 1].i = temp;
}
}
for (i = 0; i<11; i++)
{
printf("%-20stimes-->%d\n", wonu[i].a, wonu[i].i);
}
return 0;
}
四.心得体会
这次软件工程作业让我学习到了很多东西,包括对文本中词语的分析,C语言编程的知识等。