[西门子面试题]strtok的理解以及实现

MSDN Link
[url]https://msdn.microsoft.com/en-us/library/2c8d19sb(VS.71).aspx[/url]

strtok


char *strtok(
char *strToken,
const char *strDelimit
);



// crt_strtok.c
/* In this program, a loop uses strtok
* to print all the tokens (separated by commas
* or blanks) in the string named "string".
*/

#include <string.h>
#include <stdio.h>

char teststring[] = "A string\tof ,,tokens\nand some more tokens";
char seps[] = " ,\t\n";
char *token;

void main( void )
{
printf( "Tokens:\n" );
/* Establish string and get the first token: */
token = strtok( teststring, seps );
while( token != NULL )
{
/* While there are tokens in "string" */
printf( " %s\n", token );
/* Get next token: */
token = strtok( NULL, seps );
}
}


Tokens:
A
string
of
tokens
and
some
more
tokens



char* strtok_A(char *s, const char *delim)
{
const char *spanp;
int c, sc;
char *tok;
static char *last;


if (s == NULL && (s = last) == NULL)
return (NULL);

/*
* Skip (span) leading delimiters (s += strspn(s, delim), sort of).
*/
cont:
c = *s++;
for (spanp = delim; (sc = *spanp++) != 0;) {
if (c == sc)
goto cont;
}

if (c == 0) { /* no non-delimiter characters */
last = NULL;
return (NULL);
}
tok = s - 1;

/*
* Scan token (scan for delimiters: s += strcspn(s, delim), sort of).
* Note that delim must have one NUL; we stop if we see that, too.
*/
for (;;) {
c = *s++;
spanp = delim;
do {
if ((sc = *spanp++) == c) {
if (c == 0)
s = NULL;
else
s[-1] = 0;
last = s;
return (tok);
}
} while (sc != 0);
}
/* NOTREACHED */
}



char* strtok_func(char *strToken, const char *strDelimit)
{
static char *pToken = NULL;
char *pchDelimit = NULL;
char *pchRetToken = NULL;
char ucFlag = 0;

if (strToken != NULL)
{
pToken = strToken;
}

if ((*pToken == '\0') || (pToken == NULL))
{
return NULL;
}

pchRetToken = pToken;
while (*pToken != '\0')
{
for (pchDelimit = strDelimit; *pchDelimit != '\0'; pchDelimit ++)
{
if (*pchRetToken == *pchDelimit)
{
pchRetToken ++;
break;
}

if (*pToken == *pchDelimit)
{
*pToken = '\0';
ucFlag = 1;
}
}
pToken ++;

if (ucFlag == 1)
{
break;
}
}

return pchRetToken;
}


下文转自
[url]http://blog.csdn.net/morewindows/article/details/8740315[/url]

strtok源码剖析
strtok函数可以用于分隔字符串,最近看了下这个函数的源代码,其中有


unsigned char map[32];
/* Clear control map */
for (count = 0; count < 32; count++)
map[count] = 0;

/* Set bits in delimiter table */
do {
map[*ctrl >> 3] |= (1 << (*ctrl & 7));
} while (*ctrl++);


这段代码非常有意思,第一眼可能不明白,为什么用个unsigned char map[32];数组来保存分隔字符。下面的map[*ctrl >> 3] |= (1 << (*ctrl & 7));更加有点古怪。在网上查了下,并没有文章来解释,因此写篇博客来解释说明下。

这个长为32的数组与后面的左移,右移操作看起来迷惑,其实如果看过《位操作基础篇之位操作全面总结》(http://blog.csdn.net/morewindows/article/details/7354571)的“位操作与空间压缩”便不难想到,这里其实是个位操作的空间压缩技巧。因此char类型的数据只会从0到255,因此建立一个哈希表来记录哪些字符要分割,需要则标记为1,否则标记为0,然后在分隔字符串时,就能直接判断字符串的该位置是否要分割。详细请见MyStrtok的实现。


//strtok源码剖析 位操作与空间压缩
//http://blog.csdn.net/morewindows/article/details/8740315
//By MoreWindows( http://blog.csdn.net/MoreWindows )
#include <stdio.h>
// strtok源码剖析
char* __cdecl MyStrtok(char * string, const char * control)
{
unsigned char *str;
const unsigned char *ctrl = (const unsigned char *)control;
static unsigned char* _TOKEN = NULL;
//注意这里使用了static类型,实际的strtok函数出于线程安全会使用TLS

//由于char类型占一个字节取值范围为0~255
//所以可以打个bool flag[255]这样的哈希表
//来记录哪些字符为delimiter characters
//然后根据《位操作基础篇之位操作全面总结》中的位操作与空间压缩
//http://blog.csdn.net/morewindows/article/details/7354571#t6
//可以将数组大小取255/8即32
unsigned char map[32];
int count;

// Clear control map
for (count = 0; count < 32; count++)
map[count] = 0;

// Set bits in delimiter table
do {
//map[*ctrl >> 3] |= (1 << (*ctrl & 7));//strtok原来的代码
map[*ctrl / 8] |= (1 << (*ctrl % 8));
} while (*ctrl++);

// Initialize str
// If string is NULL, set str to the saved pointer
//(i.e., continue breaking tokens out of the string from the last strtok call)
if (string != NULL)
str = (unsigned char *)string;
else
str = (unsigned char *)_TOKEN;

// Find beginning of token (skip over leading delimiters). Note that
// there is no token iff this loop sets str to point to the terminal
// null (*str == '\0')
//while ( (map[*str >> 3] & (1 << (*str & 7))) && *str )//strtok原来的代码
while ( (map[*str / 8] & (1 << (*str % 8))) && *str )
str++;

string = (char *)str;

// Find the end of the token. If it is not the end of the string,
// put a null there.
for ( ; *str ; str++ )
{
//if ( map[*str >> 3] & (1 << (*str & 7)) ) //strtok原来的代码
if ( map[*str / 8] & (1 << (*str % 8)) )
{
*str++ = '\0';
break;
}
}

// Update nextoken (or the corresponding field in the per-thread data structure
_TOKEN = str;

// Determine if a token has been found.
if ( string == (char *)str )
return NULL;
else
return string;
}
int main()
{
printf(" strtok源码剖析 位操作与空间压缩\n");
printf(" - http://blog.csdn.net/morewindows/article/details/8740315 -\n");
printf(" - By MoreWindows( http://blog.csdn.net/MoreWindows ) - \n\n");

//char szText[] = "MoreWindows (By http://blog.csdn.net/MoreWindows)";
//char szFind[] = " ";
char szText[] = "ab,c...d(e)f(g)hj";
char szFind[] = ",.()";

printf("原字符串为: %s\n", szText);
printf("分隔后为: \n");
char *pToken = MyStrtok(szText, szFind);
while (pToken != NULL)
{
printf("%s\n", pToken);
pToken = MyStrtok(NULL, szFind);
}
return 0;
}


运行结果如图所示(图片不能打开,请访问http://blog.csdn.net/morewindows/article/details/8740315)

[img]https://img-my.csdn.net/uploads/201303/30/1364632382_4041.png[/img]
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值