软件工程个人项目--词频统计

姓名:鞠凡     班级:0411202       学号:2012211565

一.题目

     题目:分析一个文本文件中各个词出现的频率,并且把频率最高的10个单词打印出来,文本文件大小大约30kb---300kb大小。

二.程序结果

三.VS性能分析

1.程序总计用时大概在4秒左右,峰值占用CPU5%,还算比较有效率。


2.热路径

可见main函数占比最高


3.执行单个工作最多的函数


4.可以查看每个函数的情况


5.函数调用的情况



四.源代码

#include "stdafx.h"
#include<stdio.h>
#include<stdlib.h>
#include<ctype.h>
#include<string.h>
#include <fcntl.h> 
#include <io.h>
#define N 20
#define M 100000
#define STACK_INIT_SIZE 100
typedef struct ntree
{
	char a[N];
	int i;
	struct ntree *left;
	struct ntree *right;
}tree;
typedef struct WordNumber
{
	char a[N];
	int i;
	struct WordNumber *left;
	struct WordNumber *right;
}WN;
typedef struct {
	tree **base;
	tree **top;
	int stacksize;
}sqstack;
int sign = 0, sum = 0, n;
char ko[N];
WN wonu[M];
int traverse(tree *);
int initstack(sqstack *S);
tree *push(sqstack *s, tree *p);
tree *pop(sqstack *s);
int createtree(tree *, char *, char *, long, long);
int jfcmp(char *, char *, int);
int sort();
//主函数
int main()
{
	tree root;
	FILE *fp;
	long at, fg;
	char ch;
	int i;
	char t[M], wd[N];
	printf("                       准备扫描文章\n");
	fp = fopen("f:\\word.txt", "rt");
	if (fp == NULL)
	{
		printf("文件不存在\n");
		return 0;
	}
	at = 0;
	do
	{
		/* read a char from the file */
		ch = fgetc(fp);
		if (isprint(ch)){ t[at] = ch, at++; }
	} while (ch != EOF);
	fclose(fp);
	for (i = 0; i<N; i++)ko[i] = 0;
	printf("          按字典顺序查看单词统计结果按0\n");
	printf("          按单词出现频率顺序查看统计结果按1\n          输入数字:");
	scanf("%d", &n);
	fg = 0;
	i = 0;
	strncpy(wd, ko, N);
	while (fg<at)
	{

		if (isalpha(t[fg]))
		{
			wd[i] = t[fg];
			i++;
		}
		if (t[fg] == 32 && i>0)break;
		fg++;
	}
	strncpy(root.a, ko, N);
	strcpy(root.a, wd);
	root.i = 1;
	root.left = NULL;
	root.right = NULL;
	i = 0;
	strncpy(wd, ko, N);
	while (fg<at)
	{
		if (isalpha(t[fg]))
		{
			wd[i] = t[fg];
			i++;
		}
		if (t[fg] == 32 && i>0)break;
		fg++;
	}
	createtree(&root, wd, t, fg, at);
	traverse(&root);
	if (n == 1)sort();
	printf("在此文章中出现的单词数目是%d\n", sum);
	return 0;
}
//创建一棵二叉查找树
int createtree(tree *r, char *wd, char *t, long fg, long at)
{
	tree *p, *q;
	int i, j;
	while (1)
	{
		p = r;
		while (p != NULL)
		{
			j = jfcmp(wd, p->a, N);
			if (j<0)
			{
				q = p;
				p = p->left;
				if (p == NULL)
				{
					p = (tree *)malloc(sizeof(tree));
					strncpy(p->a, ko, N);
					strncpy(p->a, wd, N);
					p->i = 1;
					p->left = NULL;
					p->right = NULL;
					q->left = p;
					break;
				}
			}
			if (j>0)
			{
				q = p;
				p = p->right;
				if (p == NULL)
				{
					p = (tree *)malloc(sizeof(tree));
					strncpy(p->a, ko, N);
					strncpy(p->a, wd, N);
					p->i = 1;
					p->left = NULL;
					p->right = NULL;
					q->right = p;
					break;
				}
			}
			if (j == 0)
			{
				p->i++;
				break;
			}
		}
		i = 0;
		strncpy(wd, ko, N);
		while (fg<at)
		{
			if (isalpha(t[fg]))
			{
				wd[i] = t[fg];
				i++;
			}
			if (t[fg] == 32 && i>0)break;
			fg++;
			if (fg >= at)return 0;
		}
	}
	return 0;
}
//比较两个字符串的大小(字典中)
int jfcmp(char *a, char *b, int n)
{
	int i;
	for (i = 0; i<N; i++)
	{
		if (a[i] == b[i])i++;
		if (a[i]<b[i])return -1;
		if (a[i]>b[i])return 1;
	}
	return 0;
}
//中序遍历一棵二叉树,非递归实现。
int traverse(tree *r)
{
	tree *p, *q;
	sqstack l;
	initstack(&l);
	p = r;
	push(&l, p);
	while (p == r || l.base != l.top)
	{
		if (p->left != NULL)
		{
			push(&l, p->left);
			q = p;
			p = p->left;
			q->left = NULL;
		}
		else
		{
			p = pop(&l);
			if (n == 0)
			{
				printf("%-6d", sign);
				printf("%-20s次数-->", p->a);
				printf("%-6d\n", p->i);
			}
			sum += p->i;
			strncpy(wonu[sign].a, ko, N);
			strncpy(wonu[sign].a, p->a, N);
			wonu[sign].i = p->i;
			sign++;
			if (p->right != NULL)
			{
				push(&l, p->right);
				q = p;
				p = p->right;
				q->right = NULL;
			}
		}
	}
	return 0;
}
int initstack(sqstack *S)
{
	S->base = (tree **)malloc(STACK_INIT_SIZE*sizeof(tree));
	if (!S->base)exit(-1);
	S->top = S->base;
	S->stacksize = STACK_INIT_SIZE;
	return 0;
}
tree *push(sqstack *s, tree *p)
{
	*(s->top) = p;
	s->top++;
	return 0;
}
tree *pop(sqstack *s)
{
	tree *p;
	if (s->top == s->base)return 0;
	else   p = *(--s->top);
	return p;
}
int sort()
{
	int i, j, temp;
	char s[20];
	for (i = 0; i<sign - 1; i++)
	for (j = 0; j<sign - i - 1; j++)
	{
		if (wonu[j].i<wonu[j + 1].i)
		{
			strncpy(s, wonu[i].a, N);
			strncpy(wonu[j].a, wonu[j + 1].a, N);
			strncpy(wonu[j + 1].a, s, N);
			temp = wonu[j].i;
			wonu[j].i = wonu[j + 1].i;
			wonu[j + 1].i = temp;
		}
	}
	for (i = 0; i<11; i++)
	{
		printf("%-20s次数-->%d\n", wonu[i].a, wonu[i].i);
	}
	return 0;
}

五、心得体会

 通过这次个人项目的完成,自己对VS的熟练程度又上了一个档次,特别是刚开始完成性能分析只是应付任务,以至于进程中出现了“搜狗拼音”都不知道。被老师指出后才惭愧得下来重新分析,并仔细去看分析的结果,这样才有助于代码的优化再优化。


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值