简介
阅读时发现常常见到msplit的使用,因此阅读了该部分代码做一记录.
代码
/****************************************************************
*
* Function: mSplit()
*
* Purpose: Splits a string into tokens non-destructively.
*
* Parameters:
* char *
* The string to be split
* char *
* A string of token seperaters
* int
* The maximum number of tokens to be returned. A value
* of 0 means to get them all.
* int *
* Place to store the number of tokens returned
* char
* The "escape metacharacter", treat the character after
* this character as a literal and "escape" a seperator.
*
* Note if max_toks is reached, the last tok in the returned
* token array will possibly have separator characters in it.
*
* Returns:
* 2D char array with one token per "row" of the returned
* array.
*
****************************************************************/
char ** mSplit(const char *str, const char *sep_chars, const int max_toks,
int *num_toks, const char meta_char)
{
size_t cur_tok = 0; /* current token index into array of strings */
size_t tok_start; /* index to start of token */
size_t i, j;
int escaped = 0;
/* It's rare we'll need more than this even if max_toks is set really
* high. Store toks here until finished, then allocate. If more than
* this is necessary, then allocate max toks */
char *toks_buf[TOKS_BUF_SIZE];
size_t toks_buf_size = TOKS_BUF_SIZE;
int toks_buf_size_increment = 10;
char **toks_alloc = NULL; /* Used if the static buf isn't enough */
char **toks = toks_buf; /* Pointer to one of the two above */
char **retstr;
char *whitespace = " \t";
if (num_toks == NULL)
return NULL;
*num_toks = 0;
/** 被分割字串为空或者切割标志串不为空且长度为0这停止处理*/
if ((str == NULL) || (strlen(str) == 0) ||
((sep_chars != NULL) && (strlen(sep_chars) == 0)))
{
return NULL;
}
/** 如果切割标志串为空则替换为" \t"*/
if (sep_chars == NULL)
sep_chars = whitespace;
/** 切割标志中不能有结尾标志*/
/* Meta char cannot also be a separator char */
for (i = 0; i < strlen(sep_chars); i++)
{
if (sep_chars[i] == meta_char)
return NULL;
}
/** 跳过被分割串头部所有的切割标志以及空格字符*/
/* Move past initial separator characters and whitespace */
for (i = 0; i < strlen(str); i++)
{
for (j = 0; j < strlen(sep_chars); j++)
{
if ((str[i] == sep_chars[j]) ||
isspace((int)str[i]))
{
break;
}
}
/* Not a separator character or whitespace */
if (j == strlen(sep_chars))
break;
}
/*整个串中都是空格字符或者分割标志*/
if (i == strlen(str))
{
/* Nothing but separator characters or whitespace in string */
return NULL;
}
/* User only wanted one tok so return the rest of the string in
* one tok */
/**用户只需要一个字串时直接返回处理后的串*/
if ((cur_tok + 1) == (size_t)max_toks)
{
retstr = (char **)SnortAlloc(sizeof(char *));
retstr[cur_tok] = SnortStrndup(&str[i], strlen(str) - i);
if (retstr[cur_tok] == NULL)
{
mSplitFree(&retstr, cur_tok + 1);
return NULL;
}
*num_toks = cur_tok + 1;
return retstr;
}
/* Mark the beginning of the next tok */
tok_start = i;
for (; i < strlen(str); i++)
{
if (!escaped) /** 是否扫描到结尾标志*/
{
/* Got an escape character. Don't include it now, but
* must be a character after it. */
if (str[i] == meta_char) /**遇到结尾标志打上标记继续下个循环*/
{
escaped = 1;
continue;
}
/* See if the current character is a separator */
/**检查是否是分割标志*/
for (j = 0; j < strlen(sep_chars); j++)
{
/**找到直接进入下个*/
if (str[i] == sep_chars[j])
break;
}
/* It's a normal character */
/**不是分割标志或结尾标志继续检查下个字符*/
if (j == strlen(sep_chars))
continue;
/* Current character matched a separator character. Trim off
* whitespace previous to the separator. If we get here, there
* is at least one savable character */
/**让j 指向分割标志左端的第一个非空字符*/
for (j = i; j > tok_start; j--)
{
if (!isspace((int)str[j - 1]))
break;
}
/** 获取分割出的字串*/
/* Allocate a buffer. The length will not have included the
* meta char of escaped separators */
toks[cur_tok] = mSplitAddTok(&str[tok_start], j - tok_start, sep_chars, meta_char);
/* Increment current token index */
cur_tok++;
/** 跳过被分割串头部所有的切割标志以及空格字符*/
/* Move past any more separator characters or whitespace */
for (; i < strlen(str); i++)
{
for (j = 0; j < strlen(sep_chars); j++)
{
if ((str[i] == sep_chars[j]) ||
isspace((int)str[i]))
{
break;
}
}
/* Not a separator character or whitespace */
if (j == strlen(sep_chars))
break;
}
/**若果剩下的全是分割字符以及空格字符可以返回结构*/
/* Nothing but separator characters or whitespace left in the string */
if (i == strlen(str))
{
*num_toks = cur_tok;
if (toks != toks_alloc)
{
retstr = (char **)SnortAlloc(sizeof(char *) * cur_tok);
memcpy(retstr, toks, (sizeof(char *) * cur_tok));
}
else
{
retstr = toks;
}
return retstr;
}
/** 下面的是存储空间的处理*/
/* Reached the size of our current string buffer and need to
* allocate something bigger. Only get here once if max toks
* set to something other than 0 because we'll just allocate
* max toks in that case. */
if (cur_tok == toks_buf_size)
{
char **tmp;
if (toks_alloc != NULL)
tmp = toks_alloc;
else
tmp = toks_buf;
if (max_toks != 0)
toks_buf_size = max_toks;
else
toks_buf_size = cur_tok + toks_buf_size_increment;
toks_alloc = (char **)SnortAlloc(sizeof(char *) * toks_buf_size);
memcpy(toks_alloc, tmp, (sizeof(char *) * cur_tok));
toks = toks_alloc;
if (tmp != toks_buf)
free(tmp);
}
if ((max_toks != 0) && ((cur_tok + 1) == (size_t)max_toks))
{
/* Return rest of string as last tok */
*num_toks = cur_tok + 1;
/* Already got a ret string */
if (toks != toks_alloc)
{
retstr = (char **)SnortAlloc(sizeof(char *) * (cur_tok + 1));
memcpy(retstr, toks, (sizeof(char *) * (cur_tok + 1)));
}
else
{
retstr = toks;
}
/* Trim whitespace at end of last tok */
for (j = strlen(str); j > tok_start; j--)
{
if (!isspace((int)str[j - 1]))
break;
}
retstr[cur_tok] = SnortStrndup(&str[i], j - i);
if (retstr[cur_tok] == NULL)
{
mSplitFree(&retstr, cur_tok + 1);
return NULL;
}
return retstr;
}
tok_start = i;
}
else
{
/* This character is escaped with the meta char */
escaped = 0;
}
}
/* Last character was an escape character */
if (escaped)
{
for (i = 0; i < cur_tok; i++)
free(toks[i]);
if (toks == toks_alloc)
free(toks_alloc);
return NULL;
}
/* Trim whitespace at end of last tok */
for (j = i; j > tok_start; j--)
{
if (!isspace((int)str[j - 1]))
break;
}
/* Last character was not a separator character so we've got
* one more tok. Unescape escaped sepatator charactors */
if (toks != toks_alloc)
{
retstr = (char **)SnortAlloc(sizeof(char *) * (cur_tok + 1));
memcpy(retstr, toks, (sizeof(char *) * (cur_tok + 1)));
}
else
{
retstr = toks;
}
retstr[cur_tok] = mSplitAddTok(&str[tok_start], j - tok_start, sep_chars, meta_char);
/* Just add one to cur_tok index instead of incrementing
* since we're done */
*num_toks = cur_tok + 1;
return retstr;
}
/* Will not return NULL. SnortAlloc will fatal if it fails */
static char * mSplitAddTok(const char *str, const int len, const char *sep_chars, const char meta_char)
{
size_t i, j, k;
char *tok;
int tok_len = 0;
int got_meta = 0;
/* Get the length of the returned tok
* Could have a maximum token length and use a fixed sized array and
* fill it in as we go but don't want to put on that constraint */
for (i = 0; (int)i < len; i++)
{
if (!got_meta)
{
if (str[i] == meta_char)
{
got_meta = 1;
continue;
}
}
else
{
/* See if the current character is a separator */
for (j = 0; j < strlen(sep_chars); j++)
{
if (str[i] == sep_chars[j])
break;
}
/* It's a non-separator character, so include
* the meta character in the return tok */
if (j == strlen(sep_chars))
tok_len++;
got_meta = 0;
}
tok_len++;
}
/* Allocate it and fill it in */
tok = (char *)SnortAlloc(tok_len + 1);
for (i = 0, k = 0; (int)i < len; i++)
{
if (!got_meta)
{
if (str[i] == meta_char)
{
got_meta = 1;
continue;
}
}
else
{
/* See if the current character is a separator */
for (j = 0; j < strlen(sep_chars); j++)
{
if (str[i] == sep_chars[j])
break;
}
/* It's a non-separator character, so include
* the meta character in the return tok */
if (j == strlen(sep_chars))
tok[k++] = meta_char;
got_meta = 0;
}
tok[k++] = str[i];
}
return tok;
}
总结
msplit 是用来按照某个几个标志作为切割符从原字串中切割子串的.
该接口处理后的返回值一定要做参数检测
处理后的返回空间一定要释放
C++中str的find接口作为检索来提取字串可以达到相同效果