/***
*strcmp - compare two strings, returning less than, equal to, or greater than
*
*Purpose:
* STRCMP compares two strings and returns an integer
* to indicate whether the first is less than the second, the two are
* equal, or whether the first is greater than the second.
*
* Comparison is done byte by byte on an UNSIGNED basis, which is to
* say that Null (0) is less than any other character (1-255).
*
*Entry:
* const char * src - string for left-hand side of comparison
* const char * dst - string for right-hand side of comparison
*
*Exit:
* returns -1 if src < dst
* returns 0 if src == dst
* returns +1 if src > dst
*
*Exceptions:
*
*******************************************************************************/
int __cdecl strcmp (
const char * src,
const char * dst
)
{
int ret = 0 ;
while( ! (ret = *(unsigned char *)src - *(unsigned char *)dst) && *dst)
++src, ++dst;
if ( ret < 0 )
ret = -1 ;
else if ( ret > 0 )
ret = 1 ;
return( ret );
}
基于 unsigned的比较,‘\0’的ASCII是0,所以即使到结尾时也可以取值
/***
*char *strchr(string, c) - search a string for a character
*
*Purpose:
* Searches a string for a given character, which may be the
* null character '\0'.
*
*Entry:
* char *string - string to search in
* char c - character to search for
*
*Exit:
* returns pointer to the first occurence of c in string
* returns NULL if c does not occur in string
*
*Exceptions:
*
*******************************************************************************/
char * __cdecl strchr (
const char * string,
int ch
)
{
while (*string && *string != (char)ch)
string++;
if (*string == (char)ch)
return((char *)string);
return(NULL);
}
char * __cdecl strrchr (
const char * string,
int ch
)
{
char *start = (char *)string;
while (*string++) /* find end of string */终止时string指向结束字符的下一个
;
/* search towards front */
while (--string != start && *string != (char)ch)
;
if (*string == (char)ch) /* char found ? */
return( (char *)string );
return(NULL);
}
查找null字符('\0' ascii为0)结果不是返回NULL。
size_t __cdecl strlen (
const char * str
)
{
const char *eos = str;
while( *eos++ ) ;
return( eos - str - 1 );
}
注意,这么写后面一定要减1,因为while内即使循环不成立时,eos也自加了一次,最好不在while判断语句里加自加语句,放到while体里。可以用int临时变量计数,然后返回。下面的例子一样。
char * __cdecl strncat (
char * front,
const char * back,
size_t count
)
{
char *start = front;
while (*front++)
;
front--;//
while (count--)
if (!(*front++ = *back++))//若复制了结束字符直接返回就行了
return(start);
*front = '\0';
return(start);
}
/***
*int strncmp(first, last, count) - compare first count chars of strings
*
*Purpose:
* Compares two strings for lexical order. The comparison stops
* after: (1) a difference between the strings is found, (2) the end
* of the strings is reached, or (3) count characters have been
* compared.
*
*Entry:
* char *first, *last - strings to compare
* unsigned count - maximum number of characters to compare
*
*Exit:
* returns <0 if first < last
* returns 0 if first == last
* returns >0 if first > last
*
*Exceptions:
*
*******************************************************************************/
int __cdecl strncmp
(
const char *first,
const char *last,
size_t count
)
{
size_t x = 0;
if (!count)
{
return 0;
}
/*
* This explicit guard needed to deal correctly with boundary
* cases: strings shorter than 4 bytes and strings longer than
* UINT_MAX-4 bytes .
*/
if( count >= 4 )
{
/* unroll by four */
for (; x < count-4; x+=4)
{
first+=4;
last +=4;
if (*(first-4) == 0 || *(first-4) != *(last-4))
{
return(*(unsigned char *)(first-4) - *(unsigned char *)(last-4));
}
if (*(first-3) == 0 || *(first-3) != *(last-3))
{
return(*(unsigned char *)(first-3) - *(unsigned char *)(last-3));
}
if (*(first-2) == 0 || *(first-2) != *(last-2))
{
return(*(unsigned char *)(first-2) - *(unsigned char *)(last-2));
}
if (*(first-1) == 0 || *(first-1) != *(last-1))
{
return(*(unsigned char *)(first-1) - *(unsigned char *)(last-1));
}
}
}
/* residual loop */
for (; x < count; x++)
{
if (*first == 0 || *first != *last)
{
return(*(unsigned char *)first - *(unsigned char *)last);
}
first+=1;
last+=1;
}
return 0;
}
这么做是为了更快吗?
/***
*char *strncpy(dest, source, count) - copy at most n characters
*
*Purpose:
* Copies count characters from the source string to the
* destination. If count is less than the length of source,
* NO NULL CHARACTER is put onto the end of the copied string.
* If count is greater than the length of sources, dest is padded
* with null characters to length count.
*
*
*Entry:
* char *dest - pointer to destination
* char *source - source string for copy
* unsigned count - max number of characters to copy
*
*Exit:
* returns dest
*
*Exceptions:
*
*******************************************************************************/
char * __cdecl strncpy (
char * dest,
const char * source,
size_t count
)
{
char *start = dest;
while (count && (*dest++ = *source++)) /* copy string */结束字符也拷贝了,而且拷贝之后指针指向结束字符的下一个位置。
count--;
if (count) /* pad out with zeroes */
while (--count)
*dest++ = '\0';
return(start);
}
一定会拷贝到destcount个字符,但不一定会拷贝结束字符,可能拷贝之后改变dest原来的结束字符,使其没有结束符,或者溢出。内存拷贝函数不用开辟空间也不会付出。
char * __cdecl _strnset (
char * string,
int val,
size_t count
)
{
char *start = string;
while (count-- && *string)
*string++ = (char)val;
return(start);
}
char * __cdecl _strrev (
char * string
)
{
char *start = string;
char *left = string;
char ch;
while (*string++) /* find end of string */
;
string -= 2;
while (left < string)
{
ch = *left;
*left++ = *string;
*string-- = ch;
}
return(start);
}
/***
*strstr.c - search for one string inside another
*
* Copyright (c) Microsoft Corporation. All rights reserved.
*
*Purpose:
* defines strstr() - search for one string inside another
*
*******************************************************************************/
#include <cruntime.h>
#include <string.h>
/***
*char *strstr(string1, string2) - search for string2 in string1
*
*Purpose:
* finds the first occurrence of string2 in string1
*
*Entry:
* char *string1 - string to search in
* char *string2 - string to search for
*
*Exit:
* returns a pointer to the first occurrence of string2 in
* string1, or NULL if string2 does not occur in string1
*
*Uses:
*
*Exceptions:
*
*******************************************************************************/
char * __cdecl strstr (
const char * str1,
const char * str2
)
{
char *cp = (char *) str1;
char *s1, *s2;
if ( !*str2 )
return((char *)str1);
while (*cp)
{
s1 = cp;
s2 = (char *) str2;
while ( *s1 && *s2 && !(*s1-*s2) )
s1++, s2++;
if (!*s2)//s2到头时才查找成功。
return(cp);
cp++;
}
return(NULL);
}
上面是VC++的C运行时库函数原型
/* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* Return the length of the maximum initial segment
of S which contains only characters in ACCEPT. */
size_t strspn2 (const char *s,const char *accept)//在accept里没有,而在s里有的第一个字符在s里的位置序号(遍历s,返回s中的位置)
//若返回的不是位置而是指针,该函数就是strspnp
{
const char *p;
const char *a;
size_t count = 0;
//int count = 0;
for (p = s; *p != '\0'; ++p)
{
for (a = accept; *a != '\0'; ++a)
//{
if (*p == *a)
break;
//}
if (*a == '\0')
return count;
else
++count;
}
return count;
}
char * strpbrk2 (const char *s,const char *accept)//遍历s中的字符,返回第一个在accept中出现的s中的字符的指针,即在accept里有的,在s中最先出现的字符的指针。(遍历s,返回s中的指针)
//若返回的不是指针而是位置,则该函数就是strcspn
{
while (*s != '\0')
{
const char *a = accept;
while (*a != '\0')
if (*a++ == *s)
return (char *) s;
++s;
}
return NULL;
}
这是linux下的C运行时库函数原型
/* Copyright (C) 1991, 1996, 1997, 1999 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <string.h>
static char *olds = NULL;
/* Parse S into tokens separated by characters in DELIM.
If S is NULL, the last string strtok() was called with is
used. For example:
char s[] = "-abc-=-def";
x = strtok(s, "-"); // x = "abc"
x = strtok(NULL, "-="); // x = "def"
x = strtok(NULL, "="); // x = NULL
// s = "abc\0-def\0"
*/
char * strtok1 (char *s, const char *delim)
{
char *token;
if (s == NULL)
s = olds;
if (s==NULL)//?这是我自己改的,原来没有
{
return NULL;//?
}//?
// Scan leading delimiters.
s += strspn (s, delim);//仅当delim里的所有元素包含s里的全部元素时*s才会为‘\0’s中必须要有delim中没有的元素,这样才能分割,否则的话没有意义
if (*s == '\0')
return NULL;
// Find the end of the token.
token = s;//token用作返回值,返回找到的第一个分隔符前面的分割串,即token标志着返回的分割串的开始位置,而s表示结尾位置。
s = strpbrk (token, delim);//delim里的所有元素都是分隔符,而且必须不是包括了token中的所有字符,否则没有意义
if (s == NULL)//没找到分隔符,即就剩一个分割串了,结束位置默认就是字符串的结束位置。
// This token finishes the string.
olds = NULL;//(token, '\0');
else//找到第一个分隔符位置,令第一个分隔符为‘\0’,olds指向下一次开始查找的位置。
{
// Terminate the token and make OLDS point past it.
*s = '\0';
olds = s + 1;
}
return token;
}
char sentence[]="This is a sentence with 7 tokens";
cout<<"The string to be tokenized is:\n"<<sentence<<"\n\nThe tokens are:\n\n";
//char *tokenPtr=strtok2(sentence,"s");
char *tokenPtr=strtok(sentence,"snkenot7wihcaT ");//如果分隔符包含所有的token里的字符,像这样"snkenot7wihcaT "那么第一次就返回空,
while(tokenPtr!=NULL)
{
cout<<endl<<"token: "<<tokenPtr;
//tokenPtr=strtok2(NULL," ");
tokenPtr=strtok(NULL,"sell");
cout<<" sentence: "<<sentence<<endl;
}
这是测试strtok的例子,第一次传递字符指针,之后传递NULL。
static char *_TOKEN;
char * __cdecl strtok4 (
char * string,
const char * control
)
{
unsigned char *str;
const unsigned char *ctrl = (unsigned char *)control;
unsigned char map[32];
int count;
/* Clear control map */
for (count = 0; count < 32; count++)
map[count] = 0;
/* Set bits in delimiter table */
do {
map[*ctrl >> 3] |= (1 << (*ctrl & 7));
} while (*ctrl++);
/* Initialize str */
/* If string is NULL, set str to the saved
* pointer (i.e., continue breaking tokens out of the string
* from the last strtok call) */
if (string)
str = (unsigned char *)string;
else
str = (unsigned char *)_TOKEN;
/* Find beginning of token (skip over leading delimiters). Note that
* there is no token iff this loop sets str to point to the terminal
* null (*str == '\0') */
while ( (map[*str >> 3] & (1 << (*str & 7))) && *str )//分隔符已经放到了map里,strcpn 在str里找不是分隔符的字符,作为起始位置。
str++;
//1 这里没有想linux实现 做了判断确保str不为‘\0’。这里既是str为‘、0’,下面的执行会返回NULL
string = (char*)str;//string表示开始的位置
/* Find the end of the token. If it is not the end of the string,
* put a null there. */
for ( ; *str ; str++ )
if ( map[*str >> 3] & (1 << (*str & 7)) ) {//找str中存在的分隔符,找到的位置作为结束位置。一次查找完毕,跳出循环。
*str++ = '\0';
break;
}
//当str没找到时,*str为’\0‘,即为最后的一个分割所得的字符串
/* Update nextoken (or the corresponding field in the per-thread data
* structure */
_TOKEN = (char*)str;//下一次调用 开始的位置
/* Determine if a token has been found. */
if ( (unsigned char *)string == str )//这里只要上面的for循环执行了一次,就说明找到了一个分割的字符串。只有经过while语句后,*str为'\0'时(string也是),for循环才不执行,
return NULL;
else
return string;
}
这是VC++ C运行时库函数原型实现。
//1 size_t __cdecl strspn4 (
//2 size_t __cdecl strcspn4 (
char * __cdecl strpbrk4 (
const char * string,const char * control)//
{
const unsigned char *str = (const unsigned char *)string;
const unsigned char *ctrl = (const unsigned char *)control;
unsigned char map[32];
int count;
/* Clear out bit map */
for (count=0; count<32; count++)
map[count] = 0;
/* Set bits in control map */
while (*ctrl)
{
map[*ctrl >> 3] |= (1 << (*ctrl & 7));//先由ctrl来设定map。把每一个字节分两部分,第一部分5位,作为map的索引,第二部分3位作为map的值。也就相当把ctrl放到map这个集合里了
ctrl++;
}
//#if ROUTINE == _STRSPN
/*1 if (*str)
{
count=0;
while (map[*str >> 3] & (1 << (*str & 7)))//设定完map,只要用str按同样的设定方式遍历map,看是否相等,若相等(即在集合中)则继续遍历,直到找到第一个不相等的输出位置即可。
{ //结束时str为‘\0’,*str为0,&右侧为1,左侧map[0]为0,不等,退出;
count++;
str++;
}
return(count);
}
return(0);*/
//#elif ROUTINE == _STRCSPN
/*2
count=0;
map[0] |= 1; //为什么要加!,因为这次要找第一次相等的,当相等时,()内结果为1,取非后为0,循环退出,成功返回。
while (!(map[*str >> 3] & (1 << (*str & 7))))//循环的终止条件为&操作结果为1,所以两侧都要为1,才能终止。右侧肯定会为1,左侧初始化时为0了,所以要改变
{//若map[0] 不置为1,结束时&左侧 map[0] 为0,右侧为1,&之后为0,!后为1,这样循环无法停止了。
count++;
str++;
}
return(count);
*/
/*2
count=0;// strspn
while (*str)//
{
if (map[*str >> 3] & (1 << (*str & 7)))
return(count);//一旦找到就返回
str++;//没找到 则继续找
count++;
}
return(count);
*/
// ROUTINE == _STRCSPN
//
while (*str)//这是实现strbrk
{
if (map[*str >> 3] & (1 << (*str & 7)))
return((char *)str);//一旦找到就返回
str++;//没找到 则继续找
}
return(NULL);
//
}
上面是VC++ C运行时库函数原型的实现。map就相当于一个集合,所有涉及集合的表示都可以用这种方法,牺牲空间换取时间。为什么用unsigned char* 呢?移位的时候前面统统补0,是这个原因吗?