1. 背景
http://bbs.csdn.net/topics/390864830?page=1
2. 代码
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
typedef enum tagGender {
GENDER_MALE = 0,
GENDER_FEMALE
}Gender;
const char* genderName[] = {"Male", "Female"};
#define MAX_LEN 32
typedef struct tagStudent {
int age;
int scroe;
Gender gender;
char name[MAX_LEN];
}Student;
static void clear_input_buffer()
{
char c;
for (;;) {
c = getchar();
if ((c != '\n') && (c != EOF)) {
continue;
} else {
break;
}
}
}
static int get_count()
{
int count = 0;
printf("Please input the count of students: \n");
for (;;) {
if (scanf("%d", &count) != 1) {
clear_input_buffer();
printf("Input error, try again.\n");
continue;
}
if (count > 0 && count < 10) {
return count;
}
clear_input_buffer();
printf("The count can be only in (5, 10).\n");
}
}
static void output_title()
{
printf(" Name Gender Age Score\n");
}
static void output_student(Student *student)
{
printf("%16s%8s%8d%8d\n",
student->name,
genderName[student->gender],
student->age,
student->scroe);
}
static int is_valid_name(char *name)
{
assert(name != NULL);
if (strlen(name) >= MAX_LEN) {
printf("Name is too long. It is no more than %d. Try again!\n", MAX_LEN - 1);
return 0;
}
return 1;
}
static int is_valid_gender(int gender)
{
if (gender == 0 || gender == 1) return 1;
printf("Gender can only be 0 or 1. Try again!\n");
return 0;
}
static int is_valid_age(int age)
{
/* for example only! */
if (age > 10 && age < 30) return 1;
printf("Age can only be in (10, 30). Try again!\n");
return 0;
}
static int is_valid_score(int score)
{
if (score >= 0 && score <= 100) return 1;
printf("Score can only be in [0, 100]. Try again!\n");
return 0;
}
static void input_student(Student *student)
{
assert(student != NULL);
char *name = NULL;
int gender = 0;
int age = 0;
int score = 0;
int n;
const int expect_number = 4; /* all the 4 items */
printf("Input student info: Name[32] Gender[0|1] Age Score\n");
for (;;) {
n = scanf("%ms %d %d %d", &name, &gender, &age, &score);
if (n != expect_number) {
clear_input_buffer();
printf("The input is error. Try again.\n");
continue;
}
if (!is_valid_name(name) || !is_valid_gender(gender)
|| !is_valid_age(age) || !is_valid_score(score)) {
clear_input_buffer();
continue;
}
printf("Get the right data\n");
strcpy(student->name, name);
student->gender = gender;
student->age = age;
student->scroe = score;
clear_input_buffer();
free(name);
return;
}
}
int main(void) {
int count = 0;
Student* students = NULL;
int i;
count = get_count();
students = (Student*)malloc(sizeof(Student) * count);
if (NULL == students) {
printf("No more memory space.\n");
return 1;
}
for (i = 0; i < count; i++) {
printf("\ni = %d\n", i + 1);
input_student(students + i);
}
output_title();
for (i = 0; i < count; i++) {
output_student(students + i);
}
free(students);
students = NULL;
return 0;
}
3. 引入pcre
在上面的代码中,使用了scanf的特殊用法,并使用了清除输入缓冲区的机制。
事实上一种更好的解析方式是,直接对输入的一整行进行处理,同时分析出student的4个属性。此时,自己编写字符串处理函数是可以的,但更推荐的一种方式是,直接使用正则表达式。
通一般地,对于字符串的合法性检查、解析,都不需要自己写大量的&复杂的代码,而往往可以借助正则表达式。这里使用的是pcre,详见对pcre的介绍。如此代码改进如下:
/**
* gcc -Wall StudentExample-regex.c -lpcre -o student
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <pcre.h>
typedef enum tagGender {
GENDER_MALE = 0,
GENDER_FEMALE
}Gender;
const char* genderName[] = {"Male", "Female"};
#define MAX_LEN 32
#define LINE_BUFFER_MAX 1024
#define OVECCOUNT 30 /* should be a multiple of 3 */
typedef struct tagStudent {
int age;
int score;
Gender gender;
char name[MAX_LEN];
}Student;
static void clear_input_buffer()
{
char c;
for (;;) {
c = getchar();
if ((c != '\n') && (c != EOF)) {
continue;
} else {
break;
}
}
}
static int get_count()
{
int count = 0;
printf("Please input the count of students: \n");
for (;;) {
if (scanf("%d", &count) != 1) {
clear_input_buffer();
printf("Input error, try again.\n");
continue;
}
if (count > 0 && count < 10) {
// Get the count. clear the newline.
getchar();
return count;
}
clear_input_buffer();
printf("The count can be only in (5, 10).\n");
}
}
static void output_title()
{
printf(" Name Gender Age Score\n");
}
static void output_student(Student *student)
{
printf("%16s%8s%8d%8d\n",
student->name,
genderName[student->gender],
student->age,
student->score);
}
static int is_valid_gender(int gender)
{
if (gender == 0 || gender == 1) return 1;
printf("Gender can only be 0 or 1. Try again!\n");
return 0;
}
static int is_valid_age(int age)
{
/* for example only! */
if (age > 10 && age < 30) return 1;
printf("Age can only be in (10, 30). Try again!\n");
return 0;
}
static int is_valid_score(int score)
{
if (score >= 0 && score <= 100) return 1;
printf("Score can only be in [0, 100]. Try again!\n");
return 0;
}
/*
* 0: fail
* 1: ok
*/
static int parse_student(char *buffer, Student* student)
{
pcre *re;
const char *error;
char *pattern;
int erroffset;
int ovector[OVECCOUNT];
int subject_length;
int rc;
char *substring_start;
int substring_length;
assert(buffer != NULL);
assert(student != NULL);
pattern = "\\s*([\\w ]+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+";
subject_length = (int) strlen(buffer);
re = pcre_compile(pattern, /* the pattern */
0, /* default options */
&error, /* for error message */
&erroffset, /* for error offset */
NULL); /* use default character tables */
if (re == NULL) {
printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
return 0;
}
rc = pcre_exec(re, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
buffer, /* the subject string */
subject_length, /* the length of the subject */
0, /* start at offset 0 in the subject */
0, /* default options */
ovector, /* output vector for substring information */
OVECCOUNT); /* number of elements in the output vector */
if (rc < 0) {
switch (rc) {
case PCRE_ERROR_NOMATCH:
printf("No match\n");
break;
/*
Handle other special cases if you like
*/
default:
printf("Matching error %d\n", rc);
break;
}
pcre_free(re); /* Release memory used for the compiled pattern */
return 0;
}
assert(rc > 0);
// score
substring_start = buffer + ovector[8];
substring_length = ovector[9] - ovector[8];
substring_start[substring_length] = '\0';
student->score = atoi(substring_start);
// age
substring_start = buffer + ovector[6];
substring_length = ovector[7] - ovector[6];
substring_start[substring_length] = '\0';
student->age = atoi(substring_start);
// gender
substring_start = buffer + ovector[4];
substring_length = ovector[5] - ovector[4];
substring_start[substring_length] = '\0';
student->gender = atoi(substring_start);
// name
substring_start = buffer + ovector[2];
substring_length = ovector[3] - ovector[2];
substring_start[substring_length] = '\0';
if (substring_length >= MAX_LEN) substring_length = MAX_LEN - 1;
strncpy(student->name, substring_start, substring_length);
//output_student(student); // for debug only
pcre_free(re);
return is_valid_gender(student->gender)
&& is_valid_age(student->age)
&& is_valid_score(student->score);
}
static void input_student(Student *student)
{
assert(student != NULL);
char *buffer = NULL;
size_t n;
buffer = (char*)malloc(LINE_BUFFER_MAX);
n = LINE_BUFFER_MAX;
printf("Input student info: Name[32] Gender[0|1] Age Score\n");
for (;;) {
if (-1 == getline(&buffer, &n, stdin)) {
printf("No data\n");
continue;
}
if (!parse_student(buffer, student)) {
continue;
}
break;
}
free(buffer);
}
int main(void) {
int count = 0;
Student* students = NULL;
int i;
count = get_count();
students = (Student*)malloc(sizeof(Student) * count);
if (NULL == students) {
printf("No more memory space.\n");
return 1;
}
for (i = 0; i < count; i++) {
printf("\ni = %d\n", i + 1);
input_student(students + i);
}
output_title();
for (i = 0; i < count; i++) {
output_student(students + i);
}
free(students);
students = NULL;
return 0;
}
运行示例:
flying-bird@flyingbird:~/workspace/StudentExample/src$ gcc -Wall StudentExample-regex.c -lpcre -o student
flying-bird@flyingbird:~/workspace/StudentExample/src$ ./student
Please input the count of students:
3
i = 1
Input student info: Name[32] Gender[0|1] Age Score
first
No match
first name 0 16 60
i = 2
Input student info: Name[32] Gender[0|1] Age Score
second name 1 17 70
i = 3
Input student info: Name[32] Gender[0|1] Age Score
third name 0 18 80
Name Gender Age Score
first name Male 16 60
second name Female 17 70
third name Male 18 80
flying-bird@flyingbird:~/workspace/StudentExample/src$
4. 再思考
当然,前面都是用scanf的一套数据获取机制。在实际的项目中,几乎看不到这种处理方法。更常见的是:提供一种UI界面,对象的每个属性都会有一个编辑框用于输入,而且每个编辑框设置有校验机制,比如只能输入数字。
UI是为一种输入机制,往往用于新增对象。
另外一种是直接读取已经存储的数据,而这些数据往往序列化在一个文件或数据库中。——读取这个数据,就是通常所谓的反序列化。
抛开UI部分,我们把序列化以及反序列化作为练习来做。——推荐:围绕一个专题,不断地增加新的功能,作为提升C编码技能的素材。
TODO