一、实验内容
- 对于一篇给定的英文文章,利用线性表和二叉排序树来实现单词频率的统计,实现低频词的过滤,并比较两种方法的效率。具体要求如下:
- 读取英文文章文件(Infile.txt),识别其中的单词。
- 分别利用线性表和二叉排序树构建单词的存储结构。当识别出一个单词后,若线性表或者二叉排序树中没有该单词,则在适当的位置上添加该单词;若该单词已经被识别,则增加其出现的频率。
- 统计结束后,删除出现频率低于五次的单词,并显示该单词和其出现频率。
- 其余单词及其出现频率按照从高到低的次序输出到文件中(Outfile.txt),同时输出用两种方法完成该工作所用的时间。
- 计算查找表的ASL值,分析比较两种方法的效率。
- 系统运行后主菜单如下:
当选择1后进入以下界面:
其中选择2时显示利用线性表来实现所有功能所用的时间。
当在主菜单选择2二叉排序树后,进入的界面与上图类同。
二、设计思路
一)数据结构:
线性表 | continue_to_finish1(); | 连续执行至完毕 |
show_time1(); | 显示执行时间 | |
single_distinguish_count1(); | 单步执行:识别并统计单词 | |
single_delete1(); | 单步执行:删除并显示出现频率低的单词 | |
single_output1(); | 单步执行:输出其余单词及其频率 | |
single_ASL1() | 单步执行:计算并输出ASL | |
二叉排序树 | continue_to_finish2(); | 连续执行至完毕 |
show_time2(); | 显示执行时间 | |
single_distinguish_count2(); | 单步执行:识别并统计单词 | |
single_delete2(); | 单步执行:删除并显示出现频率低的单词 | |
single_output2(); | 单步执行:输出其余单词及其频率 | |
single_ASL2(); | 单步执行:计算并输出ASL |
二)源码
LinkList.h:
#pragma once
#include<stdio.h>
#include<stdlib.h>
#include<malloc.h>
#include<string.h>
#include <time.h>
#include<iostream>
using namespace std;
typedef struct node1 //单链表节点
{
char data[20];
int count;
struct node1* next;
}LNode, * LinkList;
void swap(LinkList x, LinkList y)
{
char a[20];
strcpy_s(a, x->data);
int b = x->count;
strcpy_s(x->data, y->data);
x->count = y->count;
strcpy_s(y->data, a);
y->count = b;
}
//按出现频率排序
void sort_slist(LinkList& L)
{
LinkList p, q;
p = L->next;
while (p)
{
q = p->next;
while (q)
{
if (p->count < q->count)
swap(p, q);
else
q = q->next;
}
p = p->next;
}
}
//链表初始化
void InitList(LinkList& L)
{
L = (LinkList)malloc(sizeof(LNode));
L->next = NULL;
}
//先将每一元素存入线性链表中,然后统计个数
void InsertList(LinkList& L, char* a)
{
int flag = 0;
LinkList P;
LinkList Q;
Q = L->next;
while (Q != NULL)
{
if (!strcmp(a, Q->data))
{
Q->count++;
flag = 1;
break;
}
Q = Q->next;
}
if (!flag)
{
P = (LinkList)malloc(sizeof(LNode));
strcpy_s(P->data, a);
P->count = 1;
P->next = L->next;
L->next = P;
}
}
//输出单词个数统计
void LNodeprint(LinkList& L)
{
LinkList P;
P = L->next;
cout << "单词 个数统计" << endl;
while (P != NULL)
{
printf("%-10s %d\n", P->data, P->count);
P = P->next;
}
}
//建立outFile文件并写入删除单词
void fprint1(LinkList& L)
{
LinkList P;
FILE* out;
errno_t err = fopen_s(&out, "outFile.txt", "a+");//建立输出文件
fprintf(out, "单链表删除个数<5的单词:\n");//写文件
P = L->next;
while (P)
{
fprintf(out, "%s (%d)\t", P->data, P->count);
P = P->next;
}
fclose(out);
}
void single_distinguish_count1()
{
FILE* in;
char a[20], c;
LinkList L;
InitList(L);
errno_t err = fopen_s(&in, "inFile.txt", "r");//打开输入文件
while (!feof(in))//直到碰见文件结束符结束循环
{
int i = 0;
memset(a, 0, sizeof(a));
while ((c = fgetc(in)) != EOF && !(c == ',' || c == '.' || c == '!' || c == '?' || c == ' ' || c == '(' || c == ')'))
a[i++] = c;
if (a[0])
InsertList(L, a);
}
sort_slist(L);
LNodeprint(L);
fclose(in);//关闭文件
}
//删除节点
void deletenode(LinkList& L)
{
LinkList P, Q;
FILE* out;
errno_t err = fopen_s(&out, "outFile.txt", "w+");//建立输出文件
P = L->next;
while (P && P->count >= 5)
P = P->next;
while (P)
{
Q = P;
P = P->next;
printf("删除节点: %-10s %d\n", Q->data, Q->count);
free(Q);
}
}
void single_delete1()
{
FILE* in;
int j = 1;
char a[20], c;
LinkList L;
InitList(L);
errno_t err = fopen_s(&in, "inFile.txt", "r");//打开输入文件
while (!feof(in))//直到碰见文件结束符结束循环
{
int i = 0;
memset(a, 0, sizeof(a));
while ((c = fgetc(in)) != EOF && !(c == ',' || c == '.' || c == '!' || c == '?' || c == ' ' || c == '(' || c == ')'))
a[i++] = c;
if (a[0])
InsertList(L, a);
}
sort_slist(L);
cout << "删除低频词汇" << endl;
deletenode(L);
}
void outnode(LinkList& L)
{
LinkList P;
FILE* out;
errno_t err = fopen_s(&out, "outFile.txt", "w+");//建立输出文件
P = L->next;
cout << "删除低频率单词后" << endl << "单词 个数统计" << endl;
while (P && P->count >= 5)
{
printf("%-10s %d\n", P->data, P->count);
fprintf(out, "%s(%d)\t", P->data, P->count);
P = P->next;
}
cout << "写入文件outFile.txt成功" << endl;
}
void single_output1()
{
FILE* in;
int j = 1;
char a[20], c;
LinkList L;
InitList(L);
errno_t err = fopen_s(&in, "inFile.txt", "r");//打开输入文件
while (!feof(in))//直到碰见文件结束符结束循环
{
int i = 0;
memset(a, 0, sizeof(a));
while ((c = fgetc(in)) != EOF && !(c == ',' || c == '.' || c == '!' || c == '?' || c == ' ' || c == '(' || c == ')'))
a[i++] = c;
if (a[0])
InsertList(L, a);
}
sort_slist(L);
outnode(L);
fclose(in);//关闭文件
}
int sumNode(LinkList& L)
{
int sum = 0;
LinkList p;
p = L->next;
while (p)
{
sum++;
p = p->next;
}
return sum;
}
void single_ASL1()
{
FILE* in;
int sum;
char a[20], c;
LinkList L;
InitList(L);
errno_t err = fopen_s(&in, "inFile.txt", "r");//打开输入文件
while (!feof(in))//直到碰见文件结束符结束循环
{
int i = 0;
memset(a, 0, sizeof(a));
while ((c = fgetc(in)) != EOF && !(c == ',' || c == '.' || c == '!' || c == '?' || c == ' ' || c == '(' || c == ')'))
a[i++] = c;
if (a[0])
InsertList(L, a);
}
sum = sumNode(L);
cout << "单词总个数:" << sum << endl;
fclose(in);//关闭文件
cout << "ASL = " << double(3 * (sum + 1) / 4.0) << endl;
}
void continue_to_finish1()
{
single_distinguish_count1();
single_delete1();
single_output1();
single_ASL1();
}
void show_time1()
{
double star, finish;
star = (double)clock();//获取当前时间
single_distinguish_count1();
single_delete1();
single_output1();
single_ASL1();
finish = (double)clock();//获取结束时间
cout << "执行时间:" << (finish - star) << endl;//得到的是运行for语句所用的时间,时间单位了毫秒
}
BiTree.h:
#pragma once
#include<stdio.h>
#include<stdlib.h>
#include<malloc.h>
#include<string.h>
#include <time.h>
#include<iostream>
using namespace std;
typedef struct node2//排序二叉树节点
{
char data[20];
int count;
struct node2* left;
struct node2* right;
}BSTNode, * BSTree;
BSTree T, nT;
typedef struct stack//非递归中序遍历写入文件outFile.txt
{
BSTree data[1000];
int top;
}seqstack;
void insertNode(BSTree& T, char* a)
{
if (T == NULL)
{
T = (BSTree)malloc(sizeof(BSTNode));
strcpy_s(T->data, a);
T->left = NULL;
T->right = NULL;
T->count = 1;
}
else
{
if (strcmp(a, T->data) < 0)
insertNode(T->left, a);
else if (strcmp(a, T->data) == 0)
T->count++;
else
insertNode(T->right, a);
}
}
void printTree(BSTree T)//中序遍历二叉排序树,得到有序序列
{
if (T)
{
printTree(T->left);
printf("%-10s %d\n", T->data, T->count);
cout << T->data << setw(10) << setiosflags(ios::left) << T->count << endl;
printTree(T->right);
}
}
void fprint2(BSTree T)
{
FILE* out;
seqstack S;
S.top = -1;
errno_t err = fopen_s(&out, "outFile.txt", "a+");//建立输出文件
fprintf(out, "排序二叉树删除个数 < 5 的单词:\n");
while (T || S.top != -1)
{
while (T)
{
S.top++;
S.data[S.top] = T;
T = T->left;
}
if (S.top > -1)
{
T = S.data[S.top];
S.top--;
fprintf(out, "%s (%d)\t", T->data, T->count);
T = T->right;
}
}
fclose(out);
}
void single_distinguish_count2()
{
FILE* in;
T = NULL;
errno_t err = fopen_s(&in, "inFile.txt", "r");//打开输入文件
char a[20], c;
while (!feof(in))//直到碰见文件结束符结束循环
{
int i = 0;
memset(a, 0, sizeof(a));
while ((c = fgetc(in)) != EOF && !(c == ',' || c == '.' || c == '!' || c == '?' || c == ' ' || c == '(' || c == ')'))
a[i++] = c;
if (a[0])
insertNode(T, a);
}
cout << "中序遍历二叉排序树" << endl;
cout << "单词 个数统计" << endl;
printTree(T);
}
BSTree insertTree(BSTree& nT, BSTree T)//中序遍历二叉排序树,得到有序序列
{
if (nT == NULL)
{
nT = (BSTree)malloc(sizeof(BSTNode));
strcpy_s(nT->data, T->data);
nT->count = T->count;
nT->left = NULL;
nT->right = NULL;
}
else
{
if (strcmp(T->data, nT->data) < 0)
insertTree(nT->left, T);
else
insertTree(nT->right, T);
}
return nT;
}
void newBSTree(BSTree T)//中序遍历二叉排序树,得到有序序列
{
if (T)
{
newBSTree(T->left);
if (T->count >= 5)
insertTree(nT, T);
newBSTree(T->right);
}
}
void single_delete2()
{
cout << "删除<5的二叉排序树中序遍历:" << endl;
nT = NULL;
newBSTree(T);
cout << "单词 个数统计" << endl;
printTree(nT);
}
void single_output2()
{
cout << "单词 个数统计" << endl;
printTree(nT);
cout << "写入文件outFile.txt成功" << endl;
fprint2(nT);
}
int calculateASL(BSTree T, int* s, int* j, int i) /*计算平均查找长度*/
{
if (T)
{
i++;
*s = *s + i;
if (calculateASL(T->left, s, j, i))
{
(*j)++;
if (calculateASL(T->right, s, j, i))
i--; return(1);
}
}
else return(1);
}
void single_ASL2()
{
int s = 0, j = 0, i = 0;
calculateASL(T, &s, &j, i);
cout << "ASL = " << s << " / " << j << endl;
}
void continue_to_finish2()
{
single_distinguish_count2();
single_delete2();
single_output2();
single_ASL2();
}
void show_time2()
{
double star, finish;
star = (double)clock();//获取当前时间
single_distinguish_count2();
single_delete2();
single_output2();
single_ASL2();
finish = (double)clock();//获取结束时间
cout << "执行时间:" << (finish - star) << endl;//得到的是运行for语句所用的时间,时间单位了毫秒
}
UI.h:
#pragma once
#include<iostream>
#include"BiTree.h"
#include"LinkList.h"
using namespace std;
//菜单设置
void print_choose()
{
cout << "1.连续执行至完毕" << endl;
cout << "2.显示执行时间" << endl;
cout << "3.单步执行:识别并统计单词" << endl;
cout << "4.单步执行:删除并显示出现频率低的单词" << endl;
cout << "5.单步执行:输出其余单词及其频率" << endl;
cout << "6.单步执行:计算并输出ASL" << endl;
cout << "7.返回主菜单" << endl;
cout << "请输入你的选择:";
}
int mainmenu();//防止下方Link和Tree界面报错
void Link()
{
cout << "线性表选择菜单" << endl << endl;
print_choose();
int n;
cin >> n;
while (!(n == 1 || n == 2 || n == 3 || n == 4 || n == 5 || n == 6 || n == 7))
{
cout << "输入有误,请重新输入" << endl;
cin >> n;
}
system("cls");
switch (n)
{
case 1:
continue_to_finish1();
Link();
break;
case 2:
show_time1();
Link();
break;
case 3:
single_distinguish_count1();
Link();
break;
case 4:
single_delete1();
Link();
break;
case 5:
single_output1();
Link();
break;
case 6:
single_ASL1();
Link();
break;
case 7:
mainmenu();
break;
}
}
void Tree()
{
cout << "二叉排序树选择菜单" << endl << endl;
print_choose();
int n;
cin >> n;
while (!(n == 1 || n == 2 || n == 3 || n == 4 || n == 5 || n == 6 || n == 7))
{
cout << "输入有误,请重新输入" << endl;
cin >> n;
}
system("cls");
switch (n)
{
case 1:
continue_to_finish2();
Tree();
break;
case 2:
show_time2();
Tree();
break;
case 3:
single_distinguish_count2();
Tree();
break;
case 4:
single_delete2();
Tree();
break;
case 5:
single_output2();
Tree();
break;
case 6:
single_ASL2();
Tree();
break;
case 7:
mainmenu();
break;
}
}
int mainmenu()
{
cout << "1.线性表" << endl;
cout << "2.二叉排序树" << endl;
cout << "3.退出系统" << endl;
cout << "请选择你需要的服务,输入数字(1-3):";
int n;
cin >> n;
while (!(n == 1 || n == 2 || n == 3))
{
cout << "输入有误,请重新输入" << endl;
cin >> n;
}
system("cls");
switch (n)
{
case 1:
Link();
mainmenu();
break;
case 2:
Tree();
mainmenu();
break;
case 3:
cout << "退出系统" << endl;
exit(0);
}
}
LF_filter.cpp:
#include"UI.h"
int main()
{
mainmenu();
return 0;
}