C语言二叉树字符统计,C语言实现二叉树-利用二叉树统计单词数目

最新推荐文章于 2021-05-17 16:15:13 发布

SCP Hologram

最新推荐文章于 2021-05-17 16:15:13 发布

阅读量384

点赞数

文章标签： C语言二叉树字符统计

我需要统计的单词是在程序直接硬编码的；

这样做得原因是省略了文件输入输出所带来的困惑；

我的每篇文章，一般只说一个主题；

这样也方便我日后复习；

Solution

首先，我们需要定义一个结构体，如下代码所示：

const int LONGEST_WORD = 32; // The longest word size

struct binary_tree {

char str[LONGEST_WORD];

int count;

struct binary_tree * left;

struct binary_tree * right;

};

typedef struct binary_tree node;

注意到，我们假设最长的单词定义为一个常量，在这里我觉得目前32这个长度应该可以啦；

如果要统计的文章是化学论文，建议你再加大数字，因为化学式通常都很长；

然后是，我们的结构体；这应该很容易理解的；

由于C语言没有提供我想要的BOOL类型，因此自己动手写啦下面的代码；

这个定义非常有用，通常它比define更加值得推荐；

enum BOOL {

NO,

YES

};

typedef enum BOOL BOOL;

接下来，我们需要知道单词之间是如何比较大小的；

因此，需要一个函数叫做cmp；

代码实现如下：

BOOL cmp(char * s, char * t)

{

int i;

for (i = 0; s[i] == t[i]; i++)

if ( s[i] == '\0' )

return NO;

return (s[i] - t[i]) < 0 ? NO:YES;

}

同时遍历两个字符串，然后对返回值进行一个处理；

这样只会返回两种情况NO/YES，不然的话会返回三种值(-1，0，正数);

那样的话，不利于我们往后的工作；

接下来呢，就是如果返回YES我们该(如何) (做什么)；

如果返回NO我们又该(如何)(做什么)；

因此，我们需要一个insert函数，把数据的两种不同分别插入左右子树；

void insert(node ** tree, char * val) {

node * temp = NULL;

if(!(*tree)) {

temp = (node*)malloc(sizeof(node));

temp->left = temp->right = NULL;

temp->str = val; //issue code ...

temp->count = 1;

*tree = temp;

return ;

}

if(cmp(val, (*tree)->str)) {

insert(&(*tree)->left,val);

}else if (cmp(val,(*tree)->str)) {

insert(&(*tree)->right,val);

}else{

(*tree)->count++;

}

这段代码和前面提到的(C语言实现二叉树)里面的代码几乎一样的，哪里有详细介绍；

这里主要讲解一下注释有issue code的那行，如果这行不修改，程序将会蹦溃；

但是，我会故意不马上修改它，继续往下写；

我们需要一个函数，销毁节点：

void deltree(node * tree) {

if(tree) {

deltree(tree->left);

deltree(tree->right);

free(tree);

}

为了查看我们的结果，我们需要一种遍历方式；

这里我们就选择中序吧！

void print_inorder(node * tree) {

if(tree) {

print_inorder(tree->left);

printf("[%s\t\t\t]count:[%d]\n",tree->str,tree->count);

print_inorder(tree->right);

}

我们把头文件stdio.h/stdlib.h引入后；

把主int main(int argc, char ** arg{

node * root;

node * tmp;

//int i;

root = NULL;

/* Inserting nodes into tree */

insert(&root,"hello");

insert(&root,"hey");

insert(&root,"hello");

insert(&root,"ok");

insert(&root,"hey");

insert(&root,"hey")；

printf("In Order Display\n");

print_inorder(root);/* Deleting all nodes of tree */

deltree(root);

}

gcc编译运行得到如下结果：

果然，我们的issue code有问题，原因是字符串不能像其他的，例如int类型一样直接用‘=’号赋值；

所以我们需要一个cpy函数：

void mystrcpy(char *s, char *t)

{

while ((*s++ = *t++) != '\0')

;

}

所有代码如下：

#include

const int LONGEST_WORD = 32; // The longest word size

struct binary_tree {

char str[LONGEST_WORD];

int count;

struct binary_tree * left;

struct binary_tree * right;

};

typedef struct binary_tree node;

enum BOOL {

NO,

YES

};

typedef enum BOOL BOOL;

BOOL cmp(char * s, char * t)

{

int i;

for (i = 0; s[i] == t[i]; i++)

if ( s[i] == '\0' )

return NO;

return (s[i] - t[i]) < 0 ? NO:YES;

}

void mystrcpy(char *s, char *t)

{

while ((*s++ = *t++) != '\0')

;

}

void insert(node ** tree, char * val) {

node * temp = NULL;

if(!(*tree)) {

temp = (node*)malloc(sizeof(node));

temp->left = temp->right = NULL;

//temp->str = val; //issue code ...

mystrcpy(temp->str,val);

temp->count = 1;

*tree = temp;

return ;

}

if(cmp(val, (*tree)->str)) {

insert(&(*tree)->left,val);

}else if (cmp(val,(*tree)->str)) {

insert(&(*tree)->right,val);

}else{

(*tree)->count++;

}

void deltree(node * tree) {

if(tree) {

deltree(tree->left);

deltree(tree->right);

free(tree);

}

void print_inorder(node * tree) {

if(tree) {

print_inorder(tree->left);

printf("[%s\t\t\t]count:[%d]\n",tree->str,tree->count);

print_inorder(tree->right);

}

int main(int argc, char ** argv)

{

node * root;

node * tmp;

//int i;

root = NULL;

/* Inserting nodes into tree */

insert(&root,"hello");

insert(&root,"hey");

insert(&root,"hello");

insert(&root,"ok");

insert(&root,"hey");

printf("In Order Display\n");

print_inorder(root);

/* Deleting all nodes of tree */

deltree(root);

}

最后运行结果如下：

Discussion

那么这个程序已经完成啦！

还有很多可以优化的，也可以增加更多的功能；

例如，查找特定字符出现的次数；

或者特定字符所出现的行数，等等都可以；

我们会在日后慢慢完善；