最近在学习iniparser的源码,发现了C语言处理字符串的大器(sscanf)!
具体的源码如下:
函数功能:解析一行字符串,识别出 节名,键名,键值。
/*-------------------------------------------------------------------------*/
/**
@brief Load a single line from an INI file
@param input_line Input line, may be concatenated multi-line input 可能存在拼接行
@param section Output space to store section
@param key Output space to store key
@param value Output space to store value
@return line_status value
*/
/*--------------------------------------------------------------------------*/
static line_status iniparser_line(
const char * input_line,
char * section,
char * key,
char * value)
{
line_status sta ;
char * line = NULL;
size_t len ;
line = xstrdup(input_line); //将入参存入临时变量
len = strstrip(line); //删除多于的空格
sta = LINE_UNPROCESSED ;
if (len<1) {
/* Empty line 空行 */
sta = LINE_EMPTY ;
} else if (line[0]=='#' || line[0]==';') {
/* Comment line 注释行 */
sta = LINE_COMMENT ;
} else if (line[0]=='[' && line[len-1]==']') {
/* Section name 节名 */
sscanf(line, "[%[^]]", section); //跳过'[',取[]之间的字符串内容
strstrip(section);
strlwc(section, section, len); //转换成小写字符
sta = LINE_SECTION ;
} else if (sscanf (line, "%[^=] = \"%[^\"]\"", key, value) == 2 //等号左边的值作为key,右边作为value
|| sscanf (line, "%[^=] = '%[^\']'", key, value) == 2) { // \' \" -- 表示转义字符
/* Usual key=value with quotes, with or without comments */ // 其中value带引号
strstrip(key);
strlwc(key, key, len);
/* Don't strip spaces from values surrounded with quotes */
sta = LINE_VALUE ;
} else if (sscanf (line, "%[^=] = %[^;#]", key, value) == 2) {
/* Usual key=value without quotes, with or without comments */
strstrip(key);
strlwc(key, key, len);
strstrip(value);
/*
* sscanf cannot handle '' or "" as empty values
* this is done here
*/
if (!strcmp(value, "\"\"") || (!strcmp(value, "''"))) {
value[0]=0 ;
}
sta = LINE_VALUE ;
} else if (sscanf(line, "%[^=] = %[;#]", key, value)==2
|| sscanf(line, "%[^=] %[=]", key, value) == 2) {
/*
* Special cases:
* key=
* key=;
* key=#
*/
strstrip(key);
strlwc(key, key, len);
value[0]=0 ;
sta = LINE_VALUE ;
} else {
/* Generate syntax error */
sta = LINE_ERROR ;
}
free(line);
return sta ;
}
下面咱们就来深度的剖析一下该神器,其中有用到了一些正则表达式的知识
这边先列出一些基本的规则,我们之后一一验证
1. %[^=] ; %[^A-Z] ; %[^;#] –取出特殊符号集(等号、A-Z、;#)之前的字符串
2. [% ; a-c% ; @#% –过滤字符串之前特殊符号集([、 a-c、 @#)
3. %s ; %[0-9] ; –过滤字符串之后特殊符号集(string、 0-9),该参数不需要存储
4. %[0-9] ; –取出匹配特殊符号集的字符串(0-9)
总结为, 左边为需要过滤的内容 << % >> 右边为需要取出的内容
测试代码如下:
/*****************************************************
* ** Name : ini_sscanf.c
* ** Author : lamar
* ** Version : 1.0
* ** Date : 2018-03
* ** Description : test sscanf function
* ******************************************************/
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
static void ini_sscanf(void)
{
int ret;
char *str;
char buf_1[1024];
char buf_2[1024];
char buf_3[1024];
str = "Year=\"2018\"";
ret = sscanf (str, "%[^=] = \"%[^\"]\"", buf_1, buf_2);
printf("ret:%d, buf_1:%s buf_2:%s \n", ret, buf_1, buf_2);
//运行结果:ret:2, buf_1:Year buf_2:2018
str = "abcYear=@#2018";
ret = sscanf (str, "a%[^=]=%s", buf_1, buf_2);
printf("ret:%d, buf_1:%s buf_2:%s \n", ret, buf_1, buf_2);
//运行结果:ret:2, buf_1:bcYear buf_2:@#2018
str = "abOcYear=@#2018";
ret = sscanf (str, "%[a-z] %[^0-9]", buf_1, buf_3);
printf("ret:%d, buf_1:%s buf_3:%s \n", ret, buf_1, buf_3);
//运行结果:ret:2, buf_1:ab buf_3:OcYear=@#
str = "abc=2018 year";
ret = sscanf (str, "abc=%[0-9] %[^r]", buf_1, buf_2);
printf("ret:%d, buf_1:%s buf_2:%s \n", ret, buf_1, buf_2);
//运行结果:ret:2, buf_1:2018 buf_2:yea
str = "abc=2018 @@#lamar";
ret = sscanf (str, "%*[^@] %[@#] %[a-z]", buf_1, buf_2);
printf("ret:%d, buf_1:%s buf_2:%s \n", ret, buf_1, buf_2);
//运行结果:ret:2, buf_1:@@# buf_2:lamar
return ;
}
int main()
{
ini_sscanf();
printf("end the main function\n");
return 0;
}
真乃为解析字符串之大器!