背景
没事写了个简单的统计单词个数的程序,特记录下。
代码
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#define MAX_LINE_SIZE 256
#define MAX_WORD_SIZE 32
#define MAX_FILE_SIZE (1024 * 1024 * 1024)
struct word_node{
char word[MAX_WORD_SIZE];
int counter;
struct word_node *next;
} *word_list = NULL;
typedef struct word_node word_list_t;
int get_file_size(char *file_name)
{
struct stat s;
stat(file_name, &s);
return s.st_size;
}
int is_existed(char *word)
{
word_list_t *ptr = NULL;
/* compare the word with already words in list */
for (ptr=word_list; ptr; ptr=ptr->next){
if (strcmp(word, ptr->word) == 0){
ptr->counter++;
return 1;
}
}
return 0;
}
int create_new_word(char *word)
{
word_list_t *new_node = NULL;
word_list_t *ptr = NULL;
word_list_t *pre = NULL;
new_node = (word_list_t *)malloc(sizeof(word_list_t));
if (new_node == NULL){
printf("malloc error");
return -1;
}
strcpy(new_node->word, word);
new_node->counter = 1;
if (word_list == NULL){
word_list = new_node;
}
else{
for (ptr=word_list; ptr; ptr=ptr->next){
pre = ptr;
}
pre->next = new_node;
}
return 0;
}
int quick_sort(word_list_t *head, word_list_t *tail)
{
int counter;
word_list_t *mid = NULL;
word_list_t *p = NULL;
word_list_t *q = NULL;
word_list_t *t = NULL;
if ((head->next == tail) || (head->next->next == tail))
return 0;
mid = head->next;
p = head;
q = mid;
counter = mid->counter;
t = mid->next;
while (t != tail){
if (t->counter < counter){
p = p->next = t;
}
else{
q = q->next = t;
}
t = t->next;
}
p->next = mid;
q->next = tail;
quick_sort(head, mid);
quick_sort(mid, tail);
return 0;
}
int show_words()
{
word_list_t *ptr = NULL;
printf("the words list:\n");
printf("word counters\n");
printf("----------------------------------------\n");
for (ptr=word_list; ptr; ptr=ptr->next){
printf("%-32s %4d\n", ptr->word, ptr->counter);
}
return 0;
}
int main(char argc, char **argv)
{
int ret = -1;
FILE *fp = NULL;
char *file_name = NULL;
char line[MAX_LINE_SIZE];
char *delim = ".,! ";
char *token = NULL;
char *saveptr = NULL;
char *str = NULL;
/* check args */
if (argc != 2){
printf("Usage: %s filename\n", argv[0]);
return -1;
}
/* check size of file */
file_name = argv[1];
if (get_file_size(file_name) > MAX_FILE_SIZE){
printf("the size of file too large.\n");
return -1;
}
/* open file */
fp = fopen(file_name, "r");
if (fp == NULL){
printf("open file error.\n");
return -1;
}
/* read words line by line */
while (fgets(line, MAX_LINE_SIZE, fp) != NULL){
/* delete the '\n' of line */
line[strlen(line)-1] = '\0';
str = line;
/* parse words in this line */
while (1){
token = strtok_r(str, delim, &saveptr);
if (token == NULL){
break;
}
str = NULL;
/* check the length of word */
if (strlen(token) > MAX_WORD_SIZE){
printf("the word[%s] is too long.\n", token);
return -1;
}
/* if existed, update counters of the word */
if (is_existed(token)){
;
}
/* not, create new node to store the word */
else{
create_new_word(token);
}
}
}
/* sort by ascending order */
quick_sort(word_list, NULL);
/* show words infor */
show_words();
return 0;
}