任务环境: C++ CodeBlock
任务内容:
对于两个 C++程序,设计并实现两种不同的基于哈希表的检测算法,计算两个程序的相近
度,并分析比较两种算法的效率。
任务要求:
- 分别读取两个 C++程序文件(InFile1.cpp, InFile2.cpp),识别其中的关键字并统计频度,
分别生成两个文件,保存关键字名称和对应频度(OutFile1.txt, OutFile2.txt)。 - 自行设计哈希函数,利用开放地址法或链地址法(选择一个)构建 C++语言关键字的
哈希表。在扫描源程序的过程中,每遇到关键字就查找相应哈希表,并累加相应关键字出
现的频度。 - 根据统计的两个程序中关键字不同频度,可以得到两个向量。
demo
Hash1
#include<iostream>
#include<string.h>
#include<fstream>
#include<math.h>
#include<iomanip>
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#define N 100000
#define HASHSIZE 35
#define Size 15
using namespace std;
int hash1[60],hash2[60];
typedef unsigned int uint;
typedef struct Node{
const char* key;
const int *value;
Node *next;
}Node;
class HashTable{
private:
Node* node[HASHSIZE];
public:
HashTable();
~HashTable();
int hash(const char* key);
Node* lookup(const char* key);
bool install(const char* key,const int* value);
const char* get(const char* key);
void display();
};
HashTable *ht = new HashTable();
HashTable::HashTable(){
for (int i = 0; i < HASHSIZE; ++i)
{
node[i] = NULL;
}
}
HashTable::~HashTable(){
cout<<"bye";
}
int HashTable::hash(const char* key){
return (int)((*key)-97);
}
Node* HashTable::lookup(const char* ch){
Node *np;
uint index;
index = hash(ch);
//np=node[index];
for(np=node[index];np;np=np->next){
if(!strcmp(ch,np->key))
return np;
}
return NULL;
}
bool HashTable::install(const char* key,const int* value){
uint index;
Node *np;
if(!(np=lookup(key))){
index = hash(key);
np = (Node*)malloc(sizeof(Node));
if(!np) return false;
np->key=key;
np->next = node[index];
node[index] = np;
}
np->value=value;
return true;
}
void HashTable::display(){
Node* temp;
for (int i = 0; i < HASHSIZE; ++i)
{
if(!node[i]){
printf("%d\n",i);
}
else
{
printf("");
for (temp=node[i]; temp; temp=temp->next)
{
cout<<i<<setw(50)<<temp->key<<endl;
//printf("%d %s",i,temp->key);
//cout<<i<<setw(10)<<temp->key<<setw(5)<<*(temp->value)<<endl;
// printf("\n");
}
}
}
}
static void judege_c(int hash[],const char* ch) {
Node *np;
int x;
x=ht->hash(ch);
if(x>=0&&x<=25)
{
np=ht->lookup(ch);
//!strcmp(ch,np->key)
if(np!=NULL)
if(!strcmp(ch,np->key))
{
hash[*(np->value)]++;
//cout<<np->key<<" "<<endl;
}
// if(x==8&&ht->lookup(ch))
}
}
/*
uint HashTable::hash(const char* key){
// uint hash=0;
// for (; *key; ++key)
// {
// hash=hash*33+*key;
// }
return ((*key)-97);
}
*/
/*Node* HashTable::lookup(const char* key){
Node *np;
uint index;
index = hash(key);
for(np=node[index];np;np=np->next){
if(!strcmp(key,np->key))
return np;
}
return null;
}*/
static int deal_data1(char data[]) {
char ch;
char filename[50];
cout<<"the first file:"<<endl;
cin>>filename;
ifstream infile(filename,ios::in);
ofstream outfile("dealt_data1.txt",ios::out);
if(!infile) {
cout<<"Open first file error!"<<endl;
exit(0);
}
if(!outfile) {
cout<<"Open dealt_data1 error!"<<endl;
exit(0);
}
while(infile.get(ch)) {
if(ch=='('||ch==')'||ch=='{'||ch=='}'||ch=='['||ch==']'||ch==','||ch=='<'||ch=='>'||ch==';')
ch=' ';
outfile.put(ch);
}
infile.close();
outfile.close();
ifstream infile1("dealt_data1.txt",ios::in);
if(!infile1) {
cout<<"open dealt_data1 error!"<<endl;
}
int cnt=0;
while(infile1.get(ch)) {
data[cnt++]=ch;
}
infile1.close();
return cnt;
}
static int deal_data2(char data[]) {
char ch;
char filename[50];
cout<<"the second file:"<<endl;
cin>>filename;
ifstream infile(filename,ios::in);
ofstream outfile("dealt_data2.txt",ios::out);
if(!infile) {
cout<<"Open second file error!"<<endl;
exit(0);
}
if(!outfile) {
cout<<"Open dealt_data2 error!"<<endl;
exit(0);
}
while(infile.get(ch)) {
if(ch=='('||ch==')'||ch=='{'||ch=='}'||ch=='['||ch==']'||ch==','||ch=='<'||ch=='>'||ch==';')
ch=' ';
outfile.put(ch);
}
infile.close();
outfile.close();
ifstream infile2("dealt_data2.txt",ios::in);
if(!infile2) {
cout<<"open dealt_data2 error!"<<endl;
}
int cnt=0;
while(infile2.get(ch)) {
data[cnt++]=ch;
}
infile2.close();
return cnt;
}
static void calcunum_c(const char data[],const int n,const int num) {
char ch[100];
int i,j,x;
for(i=0; i<n; i++) {
if(data[i]!=' '&&data[i]!='\n'&&data[i]!='\t') {
j=0;
while(data[i]!=' '&&data[i]!='\n'&&data[i]!='\t') {
ch[j]=data[i];
i++;
j++;
}
ch[j]='\0';
if(num==1)
{
judege_c(hash1,ch);
}
else
{
judege_c(hash2,ch);
}
}
}
}
static double possibality(int hash1[],int hash2[])
{
int i;
double sum=0,pos;
for(i=0;i<Size;i++)
{
sum=sum+(hash1[i]-hash2[i])*(hash1[i]-hash2[i]);
}
pos=sqrt(sum);
return pos;
}
int main(void)
{
const char* key[]={
"void","int","for","char","if","else","while","return",
"double","float","static","const","do","sizeof","struct"
};
const int value[]={
1,2,3,4