在vc++8.0的函数库中,strtok()函数的实现采用了bit map,随后继续查阅strspn()、strcspn()和strpbrk()这三个函数源码,发现也是采用的bit map来减少存储空间,并公用了部分代码,因此写篇博客简单记录一下。
首先来看着三个函数的原型:
//return length of prefix of string consisting of characters in control
size_t __cdecl strspn (const char * string, const char * control);
//return length of prefix of string consisring of characters not in control
size_t __cdecl strcspn (const char * string, const char * control);
//return pointer to first occurrence in string of any charactrers of control,
//or NULL if nor present
char * __cdecl strpbrk (const char * string, const char * control);
XX\Microsoft Visual Studio 8\VC\crt\src这个文件夹下有这样三个文件:strcspn.c、strpbrk.c和strspn.c。
strcspn.c:
#define SSTRCSPN //定义的宏
#include "strspn.c"
strpbrk.c:
#define SSTRPBRK //定义的宏
#include "strspn.c"
关键的实现在strspn.c中:
/* Determine which routine we're compiling for (default to STRSPN) */
#define _STRSPN 1
#define _STRCSPN 2
#define _STRPBRK 3
#if defined (SSTRCSPN)
#define ROUTINE _STRCSPN
#elif defined (SSTRPBRK)
#define ROUTINE _STRPBRK
#else
#define ROUTINE _STRSPN //默认
#endif
/* Routine prototype */
#if ROUTINE == _STRSPN
size_t __cdecl strspn (
#elif ROUTINE == _STRCSPN
size_t __cdecl strcspn (
#else /* ROUTINE == _STRCSPN */
char * __cdecl strpbrk (
#endif /* ROUTINE == _STRCSPN */
const char * string,
const char * control
)
{
const unsigned char *str = string;
const unsigned char *ctrl = control;
unsigned char map[32];
int count;
/* Clear out bit map */
for (count=0; count<32; count++)
map[count] = 0;
/* Set bits in control map */
while (*ctrl) //跟strtok()函数中的使用方式是一样的
{
map[*ctrl >> 3] |= (1 << (*ctrl & 7));
ctrl++;
}
#if ROUTINE == _STRSPN//strspn()
/* 1st char NOT in control map stops search */
if (*str)
{
count=0;
while (map[*str >> 3] & (1 << (*str & 7)))
{
count++;
str++;
}
return(count);
}
return(0);
#elif ROUTINE == _STRCSPN//strcspn()
/* 1st char in control map stops search */
count=0;
map[0] |= 1; /* null chars not considered */
while (!(map[*str >> 3] & (1 << (*str & 7))))
{
count++;
str++;
}
return(count);
#else /* ROUTINE == _STRCSPN */ //strpbrk()
/* 1st char in control map stops search */
while (*str)
{
if (map[*str >> 3] & (1 << (*str & 7)))
return((char *)str);
str++;
}
return(NULL);
#endif /* ROUTINE == _STRCSPN */
} 理解了strtok()的源码,这三个函数的理解就太容易了,主要是位图的使用,C++中提供了专门的标准类型bitset,巧妙使用,在很多算法中能节省空间,提高效率。