【C语言基础】C语言构建定长字典

C语言构建定长字典

1.C语言简介-struct

C语言中没有字典。而有些场合下需要统计一些字符串变量的累计值,就需要用定长字典。定长字典顾名思义就是长度固定的字典。在C语言中结构体可以定义任意类型数据的集合,也就可以通过结构体数组来构造个定长字典

2.词频字典

2.1词频字典结构体

/* 词频字典结构体 */

#define WORD_MAX_SIZE 100
struct word_fre_dict {
	char word[WORD_MAX_SIZE]; /* 假设单词长度最长为100 */
    int times;  /* 在一篇文章出现次数不超过 2^31 - 1次 */
};
typedef struct word_fre_dict WORD_FRE_DICT;



2.2 词频字典的排序

/* 可以用qsort函数进行排序 */

/* 按字典序排序 */
int cmp_dict(const void *a, const void *b)
{
    WORD_FRE_DICT *pa = (WORD_FRE_DICT *)a; 
    WORD_FRE_DICT *pb = (WORD_FRE_DICT *)b;
	
    return strcmp(pa->word, pb->word);
}
/* 按词频从小到大排序 */
int cmp_fre_up(const void *a, const void *b)
{
    WORD_FRE_DICT *pa = (WORD_FRE_DICT *)a; 
    WORD_FRE_DICT *pb = (WORD_FRE_DICT *)b;
    
    return (pa->times - pb->times);
}
/* 按词频从大到小排序 */
int cmp_fre_down(const void *a, const void *b)
{
    WORD_FRE_DICT *pa = (WORD_FRE_DICT *)a; 
    WORD_FRE_DICT *pb = (WORD_FRE_DICT *)b;
    
    return (pb->times - pa->times);
}

2.3 从字符串文本中提取词频字典

void create_word_fre_dict(char* paragraph)
{
    WORD_FRE_DICT word_list[1000] = { 0 };
    memset(word_list, 0, 1000 * sizeof(WORD_FRE_DICT));
    /* 预处理: 单词全改为小写, 非单词全转为空格符 */
    int i = 0;
    while (paragraph[i] != 0) {
        if (paragraph[i] >= 'A' && paragraph[i] <= 'Z') {
            tolower(paragraph[i]);
        }
        else if ((paragraph[i] < 'A' || paragraph[i] > 'Z') && \
            ((paragraph[i] < 'a' || paragraph[i] > 'z'))) {
            paragraph[i] = ' ';
        }
        i++;
    }

    /* 使用strtok公式进行字符串拆分 */
    char* temp = strtok(paragraph, " ");
    strcpy(word_list[0].word, temp);
    word_list[0].times++;

    while (temp != NULL) {
        temp = strtok(NULL, " "); /* 最后一个字节拿出来后还会执行一次,这次结果为NULL */
        if (temp == NULL) {
            break;
        }
        printf("%s\n", temp);
        int dict_index = 0;
        int find = 0;
        while (word_list[dict_index].times != 0 && dict_index < 1000) {
            if (strcmp(word_list[dict_index].word, temp) == 0) {
                word_list[dict_index].times++;
                find = 1;
                break;
            }
            dict_index++;
        }

        if (!find && dict_index < 1000) {
            strcpy(word_list[dict_index].word, temp);
            word_list[dict_index].times = 1;
        }
    }
	qsort(word_list, 50, sizeof(WORD_FRE_DICT), cmp_dict);
    for (int de = 0; de < 20; de++) {
        printf("%s, count = %d\n", word_list[de].word, word_list[de].times);
    }
}

int c_dict_test(void) {
    char para[] = { "Youth means a temperamental predominance of courage over timidity, \
                        of the appetite for adventure over the love of ease. This often \
                        exits in a man of 60, more than a boy of 20.nobody grows merely \
                        by the number of years; we grow old by deserting our ideas. Years \
                        may wrinkle the skin, but to give up enthusiasm wrinkles the soul. \
                        Worry, fear, self distrust bows the heart and turns the spirit \
                        back to dust." };
    printf("%s\n", para);
    create_word_fre_dict(para);
    return 0;
}
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值