利用状态机简单的统计文本中单词的数量,可以在splite()函数里自定义分隔符。
#include <stdio.h>
#include <ctype.h>
#define OUT 0
#define IN 1
#define INIT OUT
//判断是否为分隔符,可以自己加一些其他分隔符
int splite(char c){
if((' ' == c) || ('\n' == c) || ('\t' == c) || ('\'' == c) || ('\"' == c))
return 1;
else
return 0;
}
int count_word(char *filename){
int status = INIT;
int word = 0;
FILE *fp = fopen(filename, "r");
if(fp == NULL) return -1;
char c;
char prev = ' '; //用于记录前一个字符
while((c = fgetc(fp)) != EOF){
// 有时换行的时候会存在连字符加上换行符,该程序将其视为一个单词
if(('-' == prev) && ('\n' == c)){
continue;
}
if(splite(c)){
status = OUT;
}else if(OUT == status){
status = IN;
word++;
}
prev = c;
}
return word;
}
int main(int argc, char *argv[]){
if(argc <2) return -1;
printf("word : %d \n", count_word(argv[1]));
}