实验目的:
在代码实践中熟悉散列查找流程,熟悉C语言文件操作,提高写代码能力。
预期效果:
读取指定文件,第一行输出有效单词数量,接下来每一行使用“单词:,词频:”格式化输出所有统计到的单词与词频。
实验思路:
使用移位+取余法构造哈希函数,使用链接法解决地址冲突,采用散列查找思想。
实验代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXWORDLENGTH 12
#define HASHTABLESIZE 1000
#define DOCNAME "test.txt"
typedef struct HNode *HashNode;
struct HNode {
char *word;
int count;
HashNode next;
};
typedef HashNode HashTable[HASHTABLESIZE];
/* 在指定链中查找单词 */
HashNode findWord (HashNode hN, char *word) {
while (hN!=NULL) {
if (strcmp(hN->word,word)==0) {
return hN;
}
hN = hN->next;
}
return NULL;
}
/* 找到指定链的尾部 */
HashNode findTail (HashNode hN) {
while (hN!=NULL) {
hN = hN->next;
}
return hN;
}
/* 从文件中获取单词函数实现 */
char *readWord(FILE *fp) {
char *ret;
ret = (char *)malloc((MAXWORDLENGTH+1)*sizeof(char));
int length = 0;
char c;
while(1) {
c = fgetc(fp);
if (!((c>='a'&&c<='z')||(c>='A'&&c<='Z')) && length<=0)
continue;
else if (!((c>='a'&&c<='z')||(c>='A'&&c<='Z')) && length>0)
break;
ret[length++] = c;
}
ret[length] = '\0';
return ret;
}
/* 将单词存入哈希表 */
void hash(HashTable hT, char *word) {
char *wordForHash = (char *)malloc((MAXWORDLENGTH+1)*sizeof(char));
strcpy(wordForHash,word);
unsigned int h=0;
while (*wordForHash != '\0') {
h = (h<<5) + *wordForHash++;
}
h = h%HASHTABLESIZE;
HashNode tmp = (HashNode)malloc(sizeof(struct HNode));
tmp->word = word;
tmp->count = 1;
tmp->next = NULL;
if (hT[h]==NULL) {
hT[h] = tmp;
} else if (findWord(hT[h],word)!=NULL) {
findWord(hT[h],word)->count++;
} else {
findTail(hT[h])->next = tmp;
}
}
/* 打印哈希链 */
void printHashChain(HashNode hN) {
while (hN!=NULL) {
printf("单词: %s, 词频: %d\n",hN->word,hN->count);
hN = hN->next;
}
}
/* 主程序 */
int main() {
FILE *fp;
char *word;
int wordcount;
HashTable hashTable;
for (int i=0; i<HASHTABLESIZE; i++) {
hashTable[i] = NULL;
}
if ((fp=fopen(DOCNAME,"r"))==NULL)
puts("打开文件时发生错误");
else {
while (fgetc(fp)!=EOF) {
fseek(fp,-1,SEEK_CUR);
wordcount++;
word = readWord(fp);
hash(hashTable,word);
}
}
fclose(fp);
printf("共计%d个有效单词\n",wordcount);
for (int i=0; i<HASHTABLESIZE; i++) {
if (hashTable[i]!=NULL) {
printHashChain(hashTable[i]);
}
}
getchar();
return 0;
}
测试结果: