C语言构建定长字典
1.C语言简介-struct
C语言中没有字典。而有些场合下需要统计一些字符串变量的累计值,就需要用定长字典。定长字典顾名思义就是长度固定的字典。在C语言中结构体可以定义任意类型数据的集合,也就可以通过结构体数组来构造个定长字典。
2.词频字典
2.1词频字典结构体
/* 词频字典结构体 */
#define WORD_MAX_SIZE 100
struct word_fre_dict {
char word[WORD_MAX_SIZE]; /* 假设单词长度最长为100 */
int times; /* 在一篇文章出现次数不超过 2^31 - 1次 */
};
typedef struct word_fre_dict WORD_FRE_DICT;
2.2 词频字典的排序
/* 可以用qsort函数进行排序 */
/* 按字典序排序 */
int cmp_dict(const void *a, const void *b)
{
WORD_FRE_DICT *pa = (WORD_FRE_DICT *)a;
WORD_FRE_DICT *pb = (WORD_FRE_DICT *)b;
return strcmp(pa->word, pb->word);
}
/* 按词频从小到大排序 */
int cmp_fre_up(const void *a, const void *b)
{
WORD_FRE_DICT *pa = (WORD_FRE_DICT *)a;
WORD_FRE_DICT *pb = (WORD_FRE_DICT *)b;
return (pa->times - pb->times);
}
/* 按词频从大到小排序 */
int cmp_fre_down(const void *a, const void *b)
{
WORD_FRE_DICT *pa = (WORD_FRE_DICT *)a;
WORD_FRE_DICT *pb = (WORD_FRE_DICT *)b;
return (pb->times - pa->times);
}
2.3 从字符串文本中提取词频字典
void create_word_fre_dict(char* paragraph)
{
WORD_FRE_DICT word_list[1000] = { 0 };
memset(word_list, 0, 1000 * sizeof(WORD_FRE_DICT));
/* 预处理: 单词全改为小写, 非单词全转为空格符 */
int i = 0;
while (paragraph[i] != 0) {
if (paragraph[i] >= 'A' && paragraph[i] <= 'Z') {
tolower(paragraph[i]);
}
else if ((paragraph[i] < 'A' || paragraph[i] > 'Z') && \
((paragraph[i] < 'a' || paragraph[i] > 'z'))) {
paragraph[i] = ' ';
}
i++;
}
/* 使用strtok公式进行字符串拆分 */
char* temp = strtok(paragraph, " ");
strcpy(word_list[0].word, temp);
word_list[0].times++;
while (temp != NULL) {
temp = strtok(NULL, " "); /* 最后一个字节拿出来后还会执行一次,这次结果为NULL */
if (temp == NULL) {
break;
}
printf("%s\n", temp);
int dict_index = 0;
int find = 0;
while (word_list[dict_index].times != 0 && dict_index < 1000) {
if (strcmp(word_list[dict_index].word, temp) == 0) {
word_list[dict_index].times++;
find = 1;
break;
}
dict_index++;
}
if (!find && dict_index < 1000) {
strcpy(word_list[dict_index].word, temp);
word_list[dict_index].times = 1;
}
}
qsort(word_list, 50, sizeof(WORD_FRE_DICT), cmp_dict);
for (int de = 0; de < 20; de++) {
printf("%s, count = %d\n", word_list[de].word, word_list[de].times);
}
}
int c_dict_test(void) {
char para[] = { "Youth means a temperamental predominance of courage over timidity, \
of the appetite for adventure over the love of ease. This often \
exits in a man of 60, more than a boy of 20.nobody grows merely \
by the number of years; we grow old by deserting our ideas. Years \
may wrinkle the skin, but to give up enthusiasm wrinkles the soul. \
Worry, fear, self distrust bows the heart and turns the spirit \
back to dust." };
printf("%s\n", para);
create_word_fre_dict(para);
return 0;
}