基于二叉排序树的低频词过滤系统

一、实验内容

  1. 对于一篇给定的英文文章,利用线性表和二叉排序树来实现单词频率的统计,实现低频词的过滤,并比较两种方法的效率。具体要求如下:
  2. 读取英文文章文件(Infile.txt),识别其中的单词。
  3. 分别利用线性表和二叉排序树构建单词的存储结构。当识别出一个单词后,若线性表或者二叉排序树中没有该单词,则在适当的位置上添加该单词;若该单词已经被识别,则增加其出现的频率。
  4. 统计结束后,删除出现频率低于五次的单词,并显示该单词和其出现频率。
  5. 其余单词及其出现频率按照从高到低的次序输出到文件中(Outfile.txt),同时输出用两种方法完成该工作所用的时间。
  6. 计算查找表的ASL值,分析比较两种方法的效率。
  7. 系统运行后主菜单如下:

当选择1后进入以下界面:  

其中选择2时显示利用线性表来实现所有功能所用的时间。

当在主菜单选择2二叉排序树后,进入的界面与上图类同。

二、设计思路

一)数据结构:

线性表continue_to_finish1();连续执行至完毕
show_time1();显示执行时间
single_distinguish_count1();单步执行:识别并统计单词
single_delete1();单步执行:删除并显示出现频率低的单词
single_output1();单步执行:输出其余单词及其频率
single_ASL1()单步执行:计算并输出ASL
二叉排序树continue_to_finish2();连续执行至完毕
show_time2();显示执行时间
single_distinguish_count2();单步执行:识别并统计单词
single_delete2();单步执行:删除并显示出现频率低的单词
single_output2();单步执行:输出其余单词及其频率
single_ASL2();单步执行:计算并输出ASL

 二)源码

LinkList.h: 

#pragma once
#include<stdio.h>
#include<stdlib.h>
#include<malloc.h>
#include<string.h>
#include <time.h>
#include<iostream>

using namespace std;

typedef struct node1 //单链表节点
{
	char data[20];
	int count;
	struct node1* next;
}LNode, * LinkList;

void swap(LinkList x, LinkList y)
{
	char a[20];
	strcpy_s(a, x->data);
	int b = x->count;

	strcpy_s(x->data, y->data);
	x->count = y->count;

	strcpy_s(y->data, a);
	y->count = b;
}

//按出现频率排序
void sort_slist(LinkList& L)
{
	LinkList p, q;
	p = L->next;
	while (p)
	{
		q = p->next;
		while (q)
		{
			if (p->count < q->count)
				swap(p, q);
			else
				q = q->next;
		}
		p = p->next;
	}
}

//链表初始化
void InitList(LinkList& L)
{
	L = (LinkList)malloc(sizeof(LNode));
	L->next = NULL;
}

//先将每一元素存入线性链表中,然后统计个数 
void InsertList(LinkList& L, char* a)
{
	int flag = 0;
	LinkList P;
	LinkList Q;
	Q = L->next;
	while (Q != NULL)
	{
		if (!strcmp(a, Q->data))
		{
			Q->count++;
			flag = 1;
			break;
		}
		Q = Q->next;
	}
	if (!flag)
	{
		P = (LinkList)malloc(sizeof(LNode));
		strcpy_s(P->data, a);
		P->count = 1;
		P->next = L->next;
		L->next = P;
	}
}

//输出单词个数统计
void LNodeprint(LinkList& L)
{
	LinkList P;
	P = L->next;
	cout << "单词       个数统计" << endl;
	while (P != NULL)
	{
		printf("%-10s    %d\n", P->data, P->count);
		P = P->next;
	}
}

//建立outFile文件并写入删除单词
void fprint1(LinkList& L)
{
	LinkList P;
	FILE* out;
	errno_t err = fopen_s(&out, "outFile.txt", "a+");//建立输出文件
	fprintf(out, "单链表删除个数<5的单词:\n");//写文件
	P = L->next;
	while (P)
	{
		fprintf(out, "%s (%d)\t", P->data, P->count);
		P = P->next;
	}
	fclose(out);
}

void single_distinguish_count1()
{
	FILE* in;
	char a[20], c;
	LinkList L;
	InitList(L);
	errno_t err = fopen_s(&in, "inFile.txt", "r");//打开输入文件
	while (!feof(in))//直到碰见文件结束符结束循环
	{
		int i = 0;
		memset(a, 0, sizeof(a));
		while ((c = fgetc(in)) != EOF && !(c == ',' || c == '.' || c == '!' || c == '?' || c == ' ' || c == '(' || c == ')'))
			a[i++] = c;
		if (a[0])
			InsertList(L, a);
	}
	sort_slist(L);
	LNodeprint(L);
	fclose(in);//关闭文件	
}

//删除节点
void deletenode(LinkList& L)
{
	LinkList P, Q;
	FILE* out;
	errno_t err = fopen_s(&out, "outFile.txt", "w+");//建立输出文件
	P = L->next;
	while (P && P->count >= 5)
		P = P->next;
	while (P)
	{
		Q = P;
		P = P->next;
		printf("删除节点: %-10s  %d\n", Q->data, Q->count);
		free(Q);
	}
}

void single_delete1()
{
	FILE* in;
	int j = 1;
	char a[20], c;
	LinkList L;
	InitList(L);
	errno_t err = fopen_s(&in, "inFile.txt", "r");//打开输入文件
	while (!feof(in))//直到碰见文件结束符结束循环
	{
		int i = 0;
		memset(a, 0, sizeof(a));
		while ((c = fgetc(in)) != EOF && !(c == ',' || c == '.' || c == '!' || c == '?' || c == ' ' || c == '(' || c == ')'))
			a[i++] = c;
		if (a[0])
			InsertList(L, a);
	}
	sort_slist(L);
	cout << "删除低频词汇" << endl;
	deletenode(L);
}

void outnode(LinkList& L)
{
	LinkList P;
	FILE* out;
	errno_t err = fopen_s(&out, "outFile.txt", "w+");//建立输出文件
	P = L->next;
	cout << "删除低频率单词后" << endl << "单词       个数统计" << endl;
	while (P && P->count >= 5)
	{
		printf("%-10s  %d\n", P->data, P->count);
		fprintf(out, "%s(%d)\t", P->data, P->count);
		P = P->next;
	}
	cout << "写入文件outFile.txt成功" << endl;
}

void single_output1()
{
	FILE* in;
	int j = 1;
	char a[20], c;
	LinkList L;
	InitList(L);
	errno_t err = fopen_s(&in, "inFile.txt", "r");//打开输入文件
	while (!feof(in))//直到碰见文件结束符结束循环
	{
		int i = 0;
		memset(a, 0, sizeof(a));
		while ((c = fgetc(in)) != EOF && !(c == ',' || c == '.' || c == '!' || c == '?' || c == ' ' || c == '(' || c == ')'))
			a[i++] = c;
		if (a[0])
			InsertList(L, a);
	}
	sort_slist(L);
	outnode(L);
	fclose(in);//关闭文件
}

int sumNode(LinkList& L)
{
	int sum = 0;
	LinkList p;
	p = L->next;
	while (p)
	{
		sum++;
		p = p->next;
	}
	return sum;
}

void single_ASL1()
{
	FILE* in;
	int sum;
	char a[20], c;
	LinkList L;
	InitList(L);
	errno_t err = fopen_s(&in, "inFile.txt", "r");//打开输入文件
	while (!feof(in))//直到碰见文件结束符结束循环
	{
		int i = 0;
		memset(a, 0, sizeof(a));
		while ((c = fgetc(in)) != EOF && !(c == ',' || c == '.' || c == '!' || c == '?' || c == ' ' || c == '(' || c == ')'))
			a[i++] = c;
		if (a[0])
			InsertList(L, a);
	}
	sum = sumNode(L);
	cout << "单词总个数:" << sum << endl;
	fclose(in);//关闭文件
	cout << "ASL = " << double(3 * (sum + 1) / 4.0) << endl;
}

void continue_to_finish1()
{
	single_distinguish_count1();
	single_delete1();
	single_output1();
	single_ASL1();
}

void show_time1()
{
	double star, finish;
	star = (double)clock();//获取当前时间
	single_distinguish_count1();
	single_delete1();
	single_output1();
	single_ASL1();
	finish = (double)clock();//获取结束时间
	cout << "执行时间:" << (finish - star) << endl;//得到的是运行for语句所用的时间,时间单位了毫秒	
}

 BiTree.h:

#pragma once
#include<stdio.h>
#include<stdlib.h>
#include<malloc.h>
#include<string.h>
#include <time.h>
#include<iostream>

using namespace std;

typedef struct node2//排序二叉树节点 
{
	char data[20];
	int count;
	struct node2* left;
	struct node2* right;
}BSTNode, * BSTree;
BSTree T, nT;

typedef struct stack//非递归中序遍历写入文件outFile.txt 
{
	BSTree data[1000];
	int top;
}seqstack;

void insertNode(BSTree& T, char* a)
{
	if (T == NULL)
	{
		T = (BSTree)malloc(sizeof(BSTNode));
		strcpy_s(T->data, a);
		T->left = NULL;
		T->right = NULL;
		T->count = 1;
	}
	else
	{
		if (strcmp(a, T->data) < 0)
			insertNode(T->left, a);
		else if (strcmp(a, T->data) == 0)
			T->count++;
		else
			insertNode(T->right, a);
	}
}

void printTree(BSTree T)//中序遍历二叉排序树,得到有序序列
{
	if (T)
	{
		printTree(T->left);
		printf("%-10s   %d\n", T->data, T->count);
		cout << T->data << setw(10) << setiosflags(ios::left) << T->count << endl;
		printTree(T->right);
	}
}

void fprint2(BSTree T)
{
	FILE* out;
	seqstack S;
	S.top = -1;
	errno_t err = fopen_s(&out, "outFile.txt", "a+");//建立输出文件
	fprintf(out, "排序二叉树删除个数 < 5 的单词:\n");
	while (T || S.top != -1)
	{
		while (T)
		{
			S.top++;
			S.data[S.top] = T;
			T = T->left;
		}
		if (S.top > -1)
		{
			T = S.data[S.top];
			S.top--;
			fprintf(out, "%s (%d)\t", T->data, T->count);
			T = T->right;
		}
	}
	fclose(out);
}

void single_distinguish_count2()
{
	FILE* in;
	T = NULL;
	errno_t err = fopen_s(&in, "inFile.txt", "r");//打开输入文件
	char a[20], c;
	while (!feof(in))//直到碰见文件结束符结束循环
	{
		int i = 0;
		memset(a, 0, sizeof(a));
		while ((c = fgetc(in)) != EOF && !(c == ',' || c == '.' || c == '!' || c == '?' || c == ' ' || c == '(' || c == ')'))
			a[i++] = c;
		if (a[0])
			insertNode(T, a);
	}
	cout << "中序遍历二叉排序树" << endl;
	cout << "单词       个数统计" << endl;
	printTree(T);
}

BSTree insertTree(BSTree& nT, BSTree T)//中序遍历二叉排序树,得到有序序列
{
	if (nT == NULL)
	{
		nT = (BSTree)malloc(sizeof(BSTNode));
		strcpy_s(nT->data, T->data);
		nT->count = T->count;
		nT->left = NULL;
		nT->right = NULL;
	}
	else
	{
		if (strcmp(T->data, nT->data) < 0)
			insertTree(nT->left, T);
		else
			insertTree(nT->right, T);
	}
	return nT;
}

void newBSTree(BSTree T)//中序遍历二叉排序树,得到有序序列
{
	if (T)
	{
		newBSTree(T->left);
		if (T->count >= 5)
			insertTree(nT, T);
		newBSTree(T->right);
	}
}

void single_delete2()
{
	cout << "删除<5的二叉排序树中序遍历:" << endl;
	nT = NULL;
	newBSTree(T);
	cout << "单词       个数统计" << endl;
	printTree(nT);
}

void single_output2()
{
	cout << "单词       个数统计" << endl;
	printTree(nT);
	cout << "写入文件outFile.txt成功" << endl;
	fprint2(nT);
}

int calculateASL(BSTree T, int* s, int* j, int i) /*计算平均查找长度*/
{
	if (T)
	{
		i++; 
		*s = *s + i;
		if (calculateASL(T->left, s, j, i))
		{
			(*j)++;
			if (calculateASL(T->right, s, j, i))
				i--; return(1);
		}
	}
	else return(1);
}

void single_ASL2()
{
	int s = 0, j = 0, i = 0;
	calculateASL(T, &s, &j, i);
	cout << "ASL = " << s << " / " << j << endl;
}

void continue_to_finish2()
{
	single_distinguish_count2();
	single_delete2();
	single_output2();
	single_ASL2();
}

void show_time2()
{
	double star, finish;
	star = (double)clock();//获取当前时间
	single_distinguish_count2();
	single_delete2();
	single_output2();
	single_ASL2();
	finish = (double)clock();//获取结束时间
	cout << "执行时间:" << (finish - star) << endl;//得到的是运行for语句所用的时间,时间单位了毫秒	
}

UI.h:

#pragma once
#include<iostream>
#include"BiTree.h"
#include"LinkList.h"
using namespace std;

//菜单设置 
void print_choose()
{
	cout << "1.连续执行至完毕" << endl;
	cout << "2.显示执行时间" << endl;
	cout << "3.单步执行:识别并统计单词" << endl;
	cout << "4.单步执行:删除并显示出现频率低的单词" << endl;
	cout << "5.单步执行:输出其余单词及其频率" << endl;
	cout << "6.单步执行:计算并输出ASL" << endl;
	cout << "7.返回主菜单" << endl;
	cout << "请输入你的选择:";
}

int mainmenu();//防止下方Link和Tree界面报错

void Link()
{
	cout << "线性表选择菜单" << endl << endl;
	print_choose();
	int n;
	cin >> n;
	while (!(n == 1 || n == 2 || n == 3 || n == 4 || n == 5 || n == 6 || n == 7))
	{
		cout << "输入有误,请重新输入" << endl;
		cin >> n;
	}
	system("cls");
	switch (n)
	{
	case 1:
		continue_to_finish1();
		Link();
		break;
	case 2:
		show_time1();
		Link();
		break;
	case 3:
		single_distinguish_count1();
		Link();
		break;
	case 4:
		single_delete1();
		Link();
		break;
	case 5:
		single_output1();
		Link();
		break;
	case 6:
		single_ASL1();
		Link();
		break;
	case 7:
		mainmenu();
		break;
	}
}

void Tree()
{
	cout << "二叉排序树选择菜单" << endl << endl;
	print_choose();
	int n;
	cin >> n;
	while (!(n == 1 || n == 2 || n == 3 || n == 4 || n == 5 || n == 6 || n == 7))
	{
		cout << "输入有误,请重新输入" << endl;
		cin >> n;
	}
	system("cls");
	switch (n)
	{
	case 1:
		continue_to_finish2();
		Tree();
		break;
	case 2:
		show_time2();
		Tree();
		break;
	case 3:
		single_distinguish_count2();
		Tree();
		break;
	case 4:
		single_delete2();
		Tree();
		break;
	case 5:
		single_output2();
		Tree();
		break;
	case 6:
		single_ASL2();
		Tree();
		break;
	case 7:
		mainmenu();
		break;
	}
}

int mainmenu()
{
	cout << "1.线性表" << endl;
	cout << "2.二叉排序树" << endl;
	cout << "3.退出系统" << endl;
	cout << "请选择你需要的服务,输入数字(1-3):";

	int n;
	cin >> n;
	while (!(n == 1 || n == 2 || n == 3))
	{
		cout << "输入有误,请重新输入" << endl;
		cin >> n;
	}
	system("cls");
	switch (n)
	{
	case 1:
		Link();
		mainmenu();
		break;
	case 2:
		Tree();
		mainmenu();
		break;
	case 3:
		cout << "退出系统" << endl;
		exit(0);
	}
}

LF_filter.cpp:

#include"UI.h"

int main()
{
	mainmenu();
	return 0;
}

 

 

 

  • 0
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值