统计单词频率
主要思想:通过数据结构的二叉树实现
这个程序参考《C程序设计语言》,其基本思路就是一个单词为一个节点,比较新来的单词与当前节点,如果与当前节点一样,当前节点的count就加1,如果小于当前节点,就递归调用addtree函数,将单词插入到当前节点的右边,否则,就插入当前节点的左边。这是一种比较容易想到的方法,只要注意细节,也不难实现,下面的代码是《C程序设计语言》里的程序,读者可以比较自己写的代码和大师写的有什么不同,注意这里同样没有处理出错的情况,如内存不足。
#include <stdio.h> #include <string.h> #include <stdlib.h> struct tnode /* the tree node */ { char *word; /* points to the text*/ int count; /* number of occurrences */ struct tnode* left; /* left child */ struct tnode* right; /* right child */ }; #define MAXWORD 100 struct tnode *addtree(struct tnode *, char *); void treeprint(struct tnode *); /* 省略了,可以自己去《C 程序设计语言》上找 */ int getword(char *, int); int main(int argc, char const* argv[]) { struct tnode* root; char word[MAXWORD]; FILE *fp = NULL; root = NULL; fp = fopen("source.txt","r"); if ( fp == NULL ) { printf("文件打开错误"); return 1; } while( fscanf(fp, "%s", word) != EOF ) { root = addtree(root, word); } treeprint(root); return 0; } struct tnode *talloc(void); char *strdup(char *); struct tnode* addtree(struct tnode *p, char *w) { int cond; if (p == NULL) { p = talloc(); /* a new word has arrived */ p->word = strdup(w); /* make a new node */ p->count = 1; p->left = p->right = NULL; } else if ( (cond = strcmp(w, p->word)) == 0 ) { p->count++;/* repeated word */ } else if (cond < 0) { p->left = addtree(p->left, w); /* less than into left subtree */ } else { p->right = addtree(p->right, w); /* greater than into right subtree */ } return p; } /* in-order print of tree p */ void treeprint(struct tnode *p) { if (p != NULL) { treeprint(p->left); printf("%4d %s\n", p->count, p->word); treeprint(p->right); } } struct tnode* talloc() { return (struct tnode *)malloc(sizeof(struct tnode)); } char *strdup(char *s) { char *p; p = (char *)malloc(strlen(s) + 1); /* +1 for '\0' */ if (p != NULL) { strcpy(p, s); } return p; }