姓名:鞠凡 班级:0411202 学号:2012211565
一.题目
题目:分析一个文本文件中各个词出现的频率,并且把频率最高的10个单词打印出来,文本文件大小大约30kb---300kb大小。
二.程序结果
三.VS性能分析
1.程序总计用时大概在4秒左右,峰值占用CPU5%,还算比较有效率。
2.热路径
可见main函数占比最高
3.执行单个工作最多的函数
4.可以查看每个函数的情况
5.函数调用的情况
四.源代码
#include "stdafx.h"
#include<stdio.h>
#include<stdlib.h>
#include<ctype.h>
#include<string.h>
#include <fcntl.h>
#include <io.h>
#define N 20
#define M 100000
#define STACK_INIT_SIZE 100
typedef struct ntree
{
char a[N];
int i;
struct ntree *left;
struct ntree *right;
}tree;
typedef struct WordNumber
{
char a[N];
int i;
struct WordNumber *left;
struct WordNumber *right;
}WN;
typedef struct {
tree **base;
tree **top;
int stacksize;
}sqstack;
int sign = 0, sum = 0, n;
char ko[N];
WN wonu[M];
int traverse(tree *);
int initstack(sqstack *S);
tree *push(sqstack *s, tree *p);
tree *pop(sqstack *s);
int createtree(tree *, char *, char *, long, long);
int jfcmp(char *, char *, int);
int sort();
//主函数
int main()
{
tree root;
FILE *fp;
long at, fg;
char ch;
int i;
char t[M], wd[N];
printf(" 准备扫描文章\n");
fp = fopen("f:\\word.txt", "rt");
if (fp == NULL)
{
printf("文件不存在\n");
return 0;
}
at = 0;
do
{
/* read a char from the file */
ch = fgetc(fp);
if (isprint(ch)){ t[at] = ch, at++; }
} while (ch != EOF);
fclose(fp);
for (i = 0; i<N; i++)ko[i] = 0;
printf(" 按字典顺序查看单词统计结果按0\n");
printf(" 按单词出现频率顺序查看统计结果按1\n 输入数字:");
scanf("%d", &n);
fg = 0;
i = 0;
strncpy(wd, ko, N);
while (fg<at)
{
if (isalpha(t[fg]))
{
wd[i] = t[fg];
i++;
}
if (t[fg] == 32 && i>0)break;
fg++;
}
strncpy(root.a, ko, N);
strcpy(root.a, wd);
root.i = 1;
root.left = NULL;
root.right = NULL;
i = 0;
strncpy(wd, ko, N);
while (fg<at)
{
if (isalpha(t[fg]))
{
wd[i] = t[fg];
i++;
}
if (t[fg] == 32 && i>0)break;
fg++;
}
createtree(&root, wd, t, fg, at);
traverse(&root);
if (n == 1)sort();
printf("在此文章中出现的单词数目是%d\n", sum);
return 0;
}
//创建一棵二叉查找树
int createtree(tree *r, char *wd, char *t, long fg, long at)
{
tree *p, *q;
int i, j;
while (1)
{
p = r;
while (p != NULL)
{
j = jfcmp(wd, p->a, N);
if (j<0)
{
q = p;
p = p->left;
if (p == NULL)
{
p = (tree *)malloc(sizeof(tree));
strncpy(p->a, ko, N);
strncpy(p->a, wd, N);
p->i = 1;
p->left = NULL;
p->right = NULL;
q->left = p;
break;
}
}
if (j>0)
{
q = p;
p = p->right;
if (p == NULL)
{
p = (tree *)malloc(sizeof(tree));
strncpy(p->a, ko, N);
strncpy(p->a, wd, N);
p->i = 1;
p->left = NULL;
p->right = NULL;
q->right = p;
break;
}
}
if (j == 0)
{
p->i++;
break;
}
}
i = 0;
strncpy(wd, ko, N);
while (fg<at)
{
if (isalpha(t[fg]))
{
wd[i] = t[fg];
i++;
}
if (t[fg] == 32 && i>0)break;
fg++;
if (fg >= at)return 0;
}
}
return 0;
}
//比较两个字符串的大小(字典中)
int jfcmp(char *a, char *b, int n)
{
int i;
for (i = 0; i<N; i++)
{
if (a[i] == b[i])i++;
if (a[i]<b[i])return -1;
if (a[i]>b[i])return 1;
}
return 0;
}
//中序遍历一棵二叉树,非递归实现。
int traverse(tree *r)
{
tree *p, *q;
sqstack l;
initstack(&l);
p = r;
push(&l, p);
while (p == r || l.base != l.top)
{
if (p->left != NULL)
{
push(&l, p->left);
q = p;
p = p->left;
q->left = NULL;
}
else
{
p = pop(&l);
if (n == 0)
{
printf("%-6d", sign);
printf("%-20s次数-->", p->a);
printf("%-6d\n", p->i);
}
sum += p->i;
strncpy(wonu[sign].a, ko, N);
strncpy(wonu[sign].a, p->a, N);
wonu[sign].i = p->i;
sign++;
if (p->right != NULL)
{
push(&l, p->right);
q = p;
p = p->right;
q->right = NULL;
}
}
}
return 0;
}
int initstack(sqstack *S)
{
S->base = (tree **)malloc(STACK_INIT_SIZE*sizeof(tree));
if (!S->base)exit(-1);
S->top = S->base;
S->stacksize = STACK_INIT_SIZE;
return 0;
}
tree *push(sqstack *s, tree *p)
{
*(s->top) = p;
s->top++;
return 0;
}
tree *pop(sqstack *s)
{
tree *p;
if (s->top == s->base)return 0;
else p = *(--s->top);
return p;
}
int sort()
{
int i, j, temp;
char s[20];
for (i = 0; i<sign - 1; i++)
for (j = 0; j<sign - i - 1; j++)
{
if (wonu[j].i<wonu[j + 1].i)
{
strncpy(s, wonu[i].a, N);
strncpy(wonu[j].a, wonu[j + 1].a, N);
strncpy(wonu[j + 1].a, s, N);
temp = wonu[j].i;
wonu[j].i = wonu[j + 1].i;
wonu[j + 1].i = temp;
}
}
for (i = 0; i<11; i++)
{
printf("%-20s次数-->%d\n", wonu[i].a, wonu[i].i);
}
return 0;
}
五、心得体会
通过这次个人项目的完成,自己对VS的熟练程度又上了一个档次,特别是刚开始完成性能分析只是应付任务,以至于进程中出现了“搜狗拼音”都不知道。被老师指出后才惭愧得下来重新分析,并仔细去看分析的结果,这样才有助于代码的优化再优化。