题目要求:
- 分别读取两个C++程序文件(InFile1.cpp, InFile2.cpp),识别其中的关键字并统计频度,分别生成两个文件,保存关键字名称和对应频度(OutFile1.txt, OutFile2.txt)。
- 自行设计散列函数,分别利用开放地址法和链地址法构建C语言关键字的散列表。在扫描源程序的过程中,每遇到关键字就查找相应散列表,并累加相应关键字出现的频度。
- 根据统计的两个程序中关键字不同频度,可以得到两个向量。
- 利用开放地址法和链地址法两种方法实现,分别输出s和两种方法计算s所用的时间,分析比较两种方法的效率。
大二数据结构的课程设计,没有什么突出的地方。就只是实现了题目要求的功能,和两个选作内容。
缺点:
- 1.没有图形化
- 2.string类型没有自己定义
- 3.没有实现C++的多文件组成,导致程序看起来很复杂,很丑。
- 4.程序不够简洁,很多函数,应该是可以被写一次,让后多次调用的,但是却让我写了很多次。
图形化我试了试,但还没顾上继续学,就被C++的多文件组成,给拦住了。C++的多文件组成也没搞好,程序一被分开,就会报错了。结果两个功能都没搞好。string类型本来自己定义了应该My_string类,但是还没顾上实现,程序简洁也一样,又写了一个,但还没写完。
感悟
自己懂得太少了。为了作图形化,用了visual stdio 2019,才知道什么叫专业,什么叫项目。但还是不知道,为什么代码在不同得编译器上,运行结果还不一样。在codebook上可以运行得代码,在visual stdio 2019上,就运行不了 。
#include<iostream>
#include<fstream>
#include<iomanip>
#include<math.h>
#include<string>
#include<cstring>
#include<typeinfo>
#include<windows.h>
#include<ctime>
#define MOD1 25
using namespace std;
typedef string Type;
template<typename T>
class My_stack
{
private:
T num;
My_stack * S = NULL;
public:
My_stack * next;
void push(T x);
void pop();
bool empty();
};
class linear_table
{
private:
linear_table * head;
public:
string key;
int num;
linear_table * heads()
{
return head;
}
linear_table() {}
linear_table * next;
linear_table(const char * filename);
};
typedef struct node
{
Type key;
int num;
};
typedef struct nodes
{
Type key;
int num;
nodes * next;
};
template<typename T>
bool My_stack<T>::empty()
{
if(this->S == NULL)
return 1;
else
return 0;
}
template<typename T>
void My_stack<T>::push(T x)
{
My_stack * p = new My_stack;
if(!p)
return;
p->num = x;
p->next = S;
S = p;
}
template<typename T>
void My_stack<T>::pop()
{
if(this->S == NULL)
return;
this->S = this->S->next;
}
My_stack<int> q;
string name[MOD1] = {"void","int","char","float","default","return","throw",
"for","if","else","do","while","Auto","short","using","virtual","template",
"break","catch","friend","continue","union","volatile","goto","extern"
};
int found(Type words,string v)
{
int l=0,r=words.size()-1,flog=0,k = 0;
// printf("%d ",r);
while(1)
{
if(q.empty())
{
int ans=0;
while(l<r)
{
if(words[l]==words[l+1]&&words[l]=='/')
{
flog=1;
break;
}
if(words[l]=='/'&&words[l+1]=='*')
{
q.push(1);
ans=1;
l+=2;
break;
}
if(v[0]==words[l])
{
int mark=0;
for(int i=0; i<v.size(); i++)
{
if(v[i]!=words[l+i])
{
mark=1;
break;
}
}
if(mark==0&&(l+v.size()==words.size()))
k++;
else if(words[l+v.size()]-'a'>=0&&words[l+v.size()]-'a'<=25) {}
else if(mark==0)
k++;
}
l++;
}
if(flog==1||ans==1)
break;
}
if(flog==1)
break;
if(!q.empty())
{
while(l<r)
{
if(words[l]=='*'&&words[l+1]=='/')
{
q.pop();
l+=2;
break;
}
l++;
}
}
if(l>=r)
break;
if(flog==1)
break;
}
return k;
}
int hash_table_num(Type key)
{
int d = 0;
for(int i = 0; i<key.length(); i++)
d = d + key[i];
return d % MOD1;
}
int hash_found(Type key,node * table)
{
int t = hash_table_num(key);
while(table[t].key != key)
{
t = (t+1) % MOD1;
if(t == hash_table_num(key))
{
cout<<"found error!!!"<<endl;
return -1;
}
}
return t;
}
nodes * hash_founds(Type key,nodes * table)
{
int t = hash_table_num(key);
nodes * x = &table[t];
if(table[t].key != key)
{
x = table[t].next;
while(x->key != key)
{
x = x->next;
if(x == NULL)
{
cout<<"found error!!!"<<endl;
return NULL;
}
}
}
return x;
}
void hash_create(node * &table)
{
table = new node[MOD1];
for(int i = 0; i<MOD1; i++)
{
table[i].key = "#";
table[i].num = 0;
}
for(int i = 0; i<MOD1 ; i++)
{
int t = hash_table_num(name[i]);
while(table[t].key != "#")
{
t = (t+1) % MOD1;
if(t == hash_table_num(name[i]))
{
cout<<"hash table have fill"<<endl;
return;
}
}
table[t].key = name[i];
}
}
void hash_creates(nodes * &table)
{
table = new nodes[MOD1];
for(int i = 0; i<MOD1; i++)
{
table[i].key = "#";
table[i].num = 0;
table[i].next = NULL;
}
for(int i = 0; i<MOD1 ; i++)
{
int t = hash_table_num(name[i]);
if(table[t].key != "#") //采用尾插法,可以不改变原链表结构,为以后的多线程作准备
{
nodes * T = new nodes;
T->next = NULL;
T->num = 0;
nodes * x = &table[t];
while(x->next != NULL)
x = x->next;
x->next = T;
T->key = name[i];
}
else
table[t].key = name[i];
}
}
template <typename T>
bool read_file(string filename,T * table)
{
char filenames[100];
memset(filenames,'\0',sizeof(filenames));
for(int i = 0; i < filename.length(); i++)
filenames[i] = filename[i];
cout<<"filename : "<<filename<<endl<<endl;
fstream file(filenames);
Type words;
if(!file)
{
cout<<"Error!Can't open file!"<<endl;
return 0;
}
while(!q.empty())
q.pop();
while(getline(file,words))
{
for(int i = 0; i<MOD1; i++)
{
int k = found(words,name[i]);
if(typeid(table) == typeid(node *) && k )
{
table[hash_found(name[i],(node *)table)].num += k;
}
else if(typeid(table) == typeid(nodes *) && k )
{
hash_founds(name[i],(nodes *)table)->num += k;
}
}
}
file.close();
return 1;
}
node * IAS(int num) //识别并统计关键字频率 identify_and_statistics
{
node * table;
string filename;
hash_create(table);
if(num == 1)
filename = "mains.cpp";
else if(num == 2)
filename = "../课程设计3.0/mains.cpp";
int max_len;
if(read_file(filename,table))
{
for(int i = 0;i<4;i++)
cout<<"---------------------------------------------";
cout<<endl;
for(int i = 0 ; i<MOD1 ; i++)
{
int x = table[i].num,y = 0;
while(x)
{
x = x /10;
y++;
}
max_len = (y > table[i].key.length()) ? y : table[i].key.length();
cout<<"|"<<setw(max_len)<<setfill(' ')<<table[i].key<<"|";
}
cout<<endl;
for(int i = 0;i<4;i++)
cout<<"---------------------------------------------";
cout<<endl;
for(int i = 0 ; i<MOD1 ; i++)
{
int x = table[i].num,y = 0;
while(x)
{
x = x /10;
y++;
}
max_len = (y > table[i].key.length()) ? y : table[i].key.length();
cout<<"|"<<setw(max_len)<<setfill(' ')<<table[i].num<<"|";
}
cout<<endl;
for(int i = 0;i<4;i++)
cout<<"---------------------------------------------";
cout<<endl;
}
return table;
}
nodes * IAS_s(int num)
{
nodes * table;
string filename;
hash_creates(table);
if(num == 1)
filename = "mains.cpp";
else if(num == 2)
filename = "../课程设计3.0/mains.cpp";
if(read_file(filename,table))
{
for(int i = 0; i <MOD1; i++)
{
if(table[i].key != "#")
{
nodes * p = &table[i];
while(p != NULL)
{
cout<<"|"<<setw(5)<<setfill(' ')<<p->key<<"|"<<setw(5)<<setfill(' ')<<p->num<<"|";
p = p->next;
}
cout<<endl;
}
}
}
return table;
}
template <typename T>
float Relative_Position(T * table1, T * table2)
{
if(table1 == NULL || table2 == NULL)
{
cout<<"Error ! Don't have enough data."<<endl;
return -1;
}
float sum1 = 0,sum2 = 0,sum3 = 0;
if(typeid(table1) == typeid(node *))
{
for(int i = 0; i < MOD1; i++)
{
int t = hash_found(name[i],(node *)table1);
sum1 = sum1 + pow(table1[t].num - table2[t].num,2);
sum2 = sum2 + pow(table1[t].num,2);
sum3 = sum3 + pow(table2[t].num,2);
}
return sqrt(sum1)/(sqrt(sum2)+sqrt(sum3)); //../课程设计1.0/main.cpp
}
else if(typeid(table2) == typeid(nodes *))
{
for(int i = 0; i < MOD1; i++)
{
nodes * p1 = hash_founds(name[i],(nodes *)table1);
nodes * p2 = hash_founds(name[i],(nodes *)table2);
sum1 = sum1 + pow(p1->num - p2->num,2);
sum2 = sum2 + pow(p1->num,2);
sum3 = sum3 + pow(p2->num,2);
}
return sqrt(sum1)/(sqrt(sum2)+sqrt(sum3)); //../课程设计1.0/main.cpp
}
}
linear_table::linear_table(const char * filename)
{
head = new linear_table;
head->next = NULL;
string words;
for(int i = 0; i < MOD1; i++)
{
linear_table * p = new linear_table;
p->key = name[i];
p->num = 0;
p->next = head->next;
head->next = p;
}
fstream file(filename);
if(!file)
{
cout<<"Error!Can't open file!"<<endl;
return;
}
while(!q.empty())
q.pop();
while(getline(file,words))
{
for(int i = 0; i<MOD1; i++)
{
int k = found(words,name[i]);
//if(k != 0) 删去后,会增加程序运行时间
linear_table * p = head->next;
while(p->key != name[i])
{
p = p->next;
if(p == NULL)
{
cout<<"Error! Can't found!"<<endl;
return;
}
}
p->num += k;
}
}
linear_table * p = head->next;
int max_len;
for(int i = 0;i<4;i++)
cout<<"---------------------------------------------";
cout<<endl;
while(p != NULL)
{
int x = p->num,y = 0;
while(x)
{
x = x / 10;
y++;
}
int max_len = (y > p->key.length()) ? y: p->key.length();
cout<<"|"<<setw(max_len)<<setfill(' ')<<p->key<<"|";
p = p->next;
if(p == NULL)
{
cout<<endl;
p = head->next;
for(int i = 0;i<4;i++)
cout<<"---------------------------------------------";
cout<<endl;
while(p != NULL)
{
int x = p->num,y = 0;
while(x)
{
x = x / 10;
y++;
}
int max_len = (y > p->key.length()) ? y: p->key.length();
cout<<"|"<<setw(max_len)<<setfill(' ')<<p->num<<"|";
p = p->next;
}
}
}
cout<<endl;
for(int i = 0;i<4;i++)
cout<<"---------------------------------------------";
cout<<endl;
file.close();
}
void Run_Time(int num)
{
if(num == 1)
{
double startTime = clock();
node * table1 = IAS(1), * table2 = IAS(2);
Relative_Position(table1,table2);
double endTime = clock();
cout << "The run time is: " <<(double)(endTime - startTime) / CLOCKS_PER_SEC << "s" << endl;
}
else
{
double startTime = clock();
nodes * table1 = IAS_s(1), * table2 = IAS_s(2);
Relative_Position(table1,table2);
double endTime = clock();
cout << "The run time is: " <<(double)(endTime - startTime) / CLOCKS_PER_SEC << "s" << endl;
}
}
void low_function()
{
double startTime = clock();
linear_table table1("mains.cpp");
linear_table table2("../课程设计3.0/mains.cpp");
float sum1 = 0,sum2 = 0,sum3 = 0;
for(int i =0; i<MOD1; i++)
{
linear_table * p1 = table1.heads()->next;
linear_table * p2 = table2.heads()->next;
while(p1->key != name[i])
p1 = p1->next;
while(p2->key != name[i])
p2 = p2->next;
sum1 = sum1 + pow(p1->num - p2->num,2);
sum2 = sum2 + pow(p1->num,2);
sum3 = sum3 + pow(p2->num,2);
}
sqrt(sum1)/(sqrt(sum2)+sqrt(sum3)); //../课程设计1.0/main.cpp
double endTime = clock();
cout << "The run time is: " <<(double)(endTime - startTime) / CLOCKS_PER_SEC << "s" << endl;
system("pause");
}
void child_menu(int num)
{
node * table1 = NULL, * table2 = NULL;
nodes * table3 = NULL, * table4 = NULL;
while(1)
{
cout<<"(1)识别并统计关键字频率"<<endl;
cout<<"(2)计算相对位置"<<endl;
string x = num == 1 ? "(3)开放地址法执行时间" : "(3)链地址法执行时间";
cout<<x<<endl;
cout<<"(4)返回主界面"<<endl<<endl;
cout<<"请选择您需要的服务"<<endl;
string option;
cin>>option;
switch(option[0])
{
case '1':
if(num == 1)
{
table1 = IAS(1);
table2 = IAS(2);
}
else
{
table3 = IAS_s(1);
table4 = IAS_s(2);
}
break;
case '2':
if(num == 1)
cout<<"relative position is : "<<Relative_Position(table1,table2)<<endl;
else if(num == 2)
cout<<"relative position is : "<<Relative_Position(table3,table4)<<endl;
break;
case '3':
Run_Time(num);
break;
case '4':
return;
}
system("pause");
system("cls");
}
}
int main()
{
system("COLOR 73");
cout<<"\t\t\t\t#############################################################"<<endl<<endl<<endl;
cout<<"\t\t\t\t\t\t"<<"基于散列表的程序相近度检测系统"<<endl<<endl<<endl<<endl;
cout<<"\t\t\t\t#############################################################"<<endl<<endl;
for(int i =0; i<40; i++)
{
cout<<"=*=";
Sleep(80);
}
cout<<endl<<endl;
while(1)
{
cout<<"(1)开放地址法"<<endl;
cout<<"(2)链地址法"<<endl;
cout<<"(3)线性表法"<<endl;
cout<<"(4)退出系统"<<endl<<endl;
cout<<"请选择您需要的服务"<<endl;
string option;
cin>>option;
switch(option[0])
{
case '1':
system("cls");
child_menu(1);
break;
case '2':
system("cls");
child_menu(2);
break;
case '3':
low_function();
break;
case '4':
return 0;
}
system("cls");
}
}