C语言字符串处理函数原型实现

最新推荐文章于 2022-07-19 13:39:34 发布

littlestream9527

最新推荐文章于 2022-07-19 13:39:34 发布

阅读量1k

点赞数

分类专栏： C/C++

本文链接：https://blog.csdn.net/littlestream9527/article/details/8887840

版权

C/C++ 专栏收录该内容

15 篇文章 0 订阅

订阅专栏

/***
*strcmp - compare two strings, returning less than, equal to, or greater than
*
*Purpose:
*       STRCMP compares two strings and returns an integer
*       to indicate whether the first is less than the second, the two are
*       equal, or whether the first is greater than the second.
*
*       Comparison is done byte by byte on an UNSIGNED basis, which is to
*       say that Null (0) is less than any other character (1-255).
*
*Entry:
*       const char * src - string for left-hand side of comparison
*       const char * dst - string for right-hand side of comparison
*
*Exit:
*       returns -1 if src <  dst
*       returns  0 if src == dst
*       returns +1 if src >  dst
*
*Exceptions:
*
*******************************************************************************/

int __cdecl strcmp (
        const char * src,
        const char * dst
        )
{
        int ret = 0 ;

        while( ! (ret = *(unsigned char *)src - *(unsigned char *)dst) && *dst)
                ++src, ++dst;

        if ( ret < 0 )
                ret = -1 ;
        else if ( ret > 0 )
                ret = 1 ;

        return( ret );
}

基于 unsigned的比较，‘\0’的ASCII是0，所以即使到结尾时也可以取值

/***
*char *strchr(string, c) - search a string for a character
*
*Purpose:
*       Searches a string for a given character, which may be the
*       null character '\0'.
*
*Entry:
*       char *string - string to search in
*       char c - character to search for
*
*Exit:
*       returns pointer to the first occurence of c in string
*       returns NULL if c does not occur in string
*
*Exceptions:
*
*******************************************************************************/

char * __cdecl strchr (
        const char * string,
        int ch
        )
{
        while (*string && *string != (char)ch)
                string++;

        if (*string == (char)ch)
                return((char *)string);
        return(NULL);
}

char * __cdecl strrchr (
        const char * string,
        int ch
        )
{
        char *start = (char *)string;

        while (*string++)                       /* find end of string */终止时string指向结束字符的下一个
                ;
                                                /* search towards front */
        while (--string != start && *string != (char)ch)
                ;

        if (*string == (char)ch)                /* char found ? */
                return( (char *)string );

        return(NULL);
}

查找null字符（'\0' ascii为0）结果不是返回NULL。

size_t __cdecl strlen (
        const char * str
        )
{
        const char *eos = str;

        while( *eos++ ) ;

        return( eos - str - 1 );
}

注意，这么写后面一定要减1，因为while内即使循环不成立时，eos也自加了一次，最好不在while判断语句里加自加语句，放到while体里。可以用int临时变量计数，然后返回。下面的例子一样。

char * __cdecl strncat (
        char * front,
        const char * back,
        size_t count
        )
{
        char *start = front;

        while (*front++)
                ;
        front--;//

        while (count--)
                if (!(*front++ = *back++))//若复制了结束字符直接返回就行了
                        return(start);

        *front = '\0';
        return(start);
}

/***
*int strncmp(first, last, count) - compare first count chars of strings
*
*Purpose:
*       Compares two strings for lexical order.  The comparison stops
*       after: (1) a difference between the strings is found, (2) the end
*       of the strings is reached, or (3) count characters have been
*       compared.
*
*Entry:
*       char *first, *last - strings to compare
*       unsigned count - maximum number of characters to compare
*
*Exit:
*       returns <0 if first < last
*       returns  0 if first == last
*       returns >0 if first > last
*
*Exceptions:
*
*******************************************************************************/

int __cdecl strncmp
(
    const char *first,
    const char *last,
    size_t      count
)
{
    size_t x = 0;

    if (!count)
    {
        return 0;
    }

    /*
     * This explicit guard needed to deal correctly with boundary
     * cases: strings shorter than 4 bytes and strings longer than
     * UINT_MAX-4 bytes .
     */
    if( count >= 4 )
    {
        /* unroll by four */
        for (; x < count-4; x+=4)
        {
            first+=4;
            last +=4;

            if (*(first-4) == 0 || *(first-4) != *(last-4))
            {
                return(*(unsigned char *)(first-4) - *(unsigned char *)(last-4));
            }

            if (*(first-3) == 0 || *(first-3) != *(last-3))
            {
                return(*(unsigned char *)(first-3) - *(unsigned char *)(last-3));
            }

            if (*(first-2) == 0 || *(first-2) != *(last-2))
            {
                return(*(unsigned char *)(first-2) - *(unsigned char *)(last-2));
            }

            if (*(first-1) == 0 || *(first-1) != *(last-1))
            {
                return(*(unsigned char *)(first-1) - *(unsigned char *)(last-1));
            }
        }
    }

    /* residual loop */
    for (; x < count; x++)
    {
        if (*first == 0 || *first != *last)
        {
            return(*(unsigned char *)first - *(unsigned char *)last);
        }
        first+=1;
        last+=1;
    }

    return 0;
}

这么做是为了更快吗？

/***
*char *strncpy(dest, source, count) - copy at most n characters
*
*Purpose:
*       Copies count characters from the source string to the
*       destination.  If count is less than the length of source,
*       NO NULL CHARACTER is put onto the end of the copied string.
*       If count is greater than the length of sources, dest is padded
*       with null characters to length count.
*
*
*Entry:
*       char *dest - pointer to destination
*       char *source - source string for copy
*       unsigned count - max number of characters to copy
*
*Exit:
*       returns dest
*
*Exceptions:
*
*******************************************************************************/

char * __cdecl strncpy (
        char * dest,
        const char * source,
        size_t count
        )
{
        char *start = dest;

        while (count && (*dest++ = *source++))    /* copy string */结束字符也拷贝了，而且拷贝之后指针指向结束字符的下一个位置。
                count--;

        if (count)                              /* pad out with zeroes */
                while (--count)
                        *dest++ = '\0';

        return(start);
}

一定会拷贝到destcount个字符，但不一定会拷贝结束字符，可能拷贝之后改变dest原来的结束字符，使其没有结束符，或者溢出。内存拷贝函数不用开辟空间也不会付出。

char * __cdecl _strnset (
        char * string,
        int val,
        size_t count
        )
{
        char *start = string;

        while (count-- && *string)
                *string++ = (char)val;

        return(start);
}

char * __cdecl _strrev (
        char * string
        )
{
        char *start = string;
        char *left = string;
        char ch;

        while (*string++)                 /* find end of string */
                ;
        string -= 2;

        while (left < string)
        {
                ch = *left;
                *left++ = *string;
                *string-- = ch;
        }

        return(start);
}

/***
*strstr.c - search for one string inside another
*
*       Copyright (c) Microsoft Corporation. All rights reserved.
*
*Purpose:
*       defines strstr() - search for one string inside another
*
*******************************************************************************/

#include <cruntime.h>
#include <string.h>

/***
*char *strstr(string1, string2) - search for string2 in string1
*
*Purpose:
*       finds the first occurrence of string2 in string1
*
*Entry:
*       char *string1 - string to search in
*       char *string2 - string to search for
*
*Exit:
*       returns a pointer to the first occurrence of string2 in
*       string1, or NULL if string2 does not occur in string1
*
*Uses:
*
*Exceptions:
*
*******************************************************************************/

char * __cdecl strstr (
        const char * str1,
        const char * str2
        )
{
        char *cp = (char *) str1;
        char *s1, *s2;

        if ( !*str2 )
            return((char *)str1);

        while (*cp)
        {
                s1 = cp;
                s2 = (char *) str2;

                while ( *s1 && *s2 && !(*s1-*s2) )
                        s1++, s2++;

                if (!*s2)//s2到头时才查找成功。
                        return(cp);

                cp++;
        }

        return(NULL);

}

上面是VC++的C运行时库函数原型

/* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with the GNU C Library; see the file COPYING.LIB.  If not,
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  */

/* Return the length of the maximum initial segment
   of S which contains only characters in ACCEPT.  */
size_t strspn2 (const char *s,const char *accept)//在accept里没有，而在s里有的第一个字符在s里的位置序号（遍历s，返回s中的位置）
//若返回的不是位置而是指针，该函数就是strspnp
{
  const char *p;
  const char *a;
  size_t count = 0;
  //int count = 0;

  for (p = s; *p != '\0'; ++p)
    {
      for (a = accept; *a != '\0'; ++a)
	  //{
		if (*p == *a)
		break;
	  //}
      if (*a == '\0')
		return count;
      else
		++count;
    }

  return count;
}

char * strpbrk2 (const char *s,const char *accept)//遍历s中的字符，返回第一个在accept中出现的s中的字符的指针，即在accept里有的，在s中最先出现的字符的指针。（遍历s，返回s中的指针）
//若返回的不是指针而是位置，则该函数就是strcspn
{
	while (*s != '\0')
	{
		const char *a = accept;
		while (*a != '\0')
			if (*a++ == *s)
				return (char *) s;
		++s;
	}

	return NULL;
}

这是linux下的C运行时库函数原型

/* Copyright (C) 1991, 1996, 1997, 1999 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with the GNU C Library; see the file COPYING.LIB.  If not,
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  */

#include <string.h>


static char *olds = NULL;

/* Parse S into tokens separated by characters in DELIM.
   If S is NULL, the last string strtok() was called with is
   used.  For example:
	char s[] = "-abc-=-def";
	x = strtok(s, "-");		// x = "abc"
	x = strtok(NULL, "-=");		// x = "def"
	x = strtok(NULL, "=");		// x = NULL
		// s = "abc\0-def\0"
*/

char * strtok1 (char *s, const char *delim)
{
  char *token;

  if (s == NULL)
    s = olds;

  if (s==NULL)//?这是我自己改的，原来没有
  {
	  return NULL;//?
  }//?
  
  // Scan leading delimiters.  
  s += strspn (s, delim);//仅当delim里的所有元素包含s里的全部元素时*s才会为‘\0’s中必须要有delim中没有的元素，这样才能分割，否则的话没有意义
  if (*s == '\0')
    return NULL;

  // Find the end of the token.  
  token = s;//token用作返回值，返回找到的第一个分隔符前面的分割串，即token标志着返回的分割串的开始位置，而s表示结尾位置。
  s = strpbrk (token, delim);//delim里的所有元素都是分隔符，而且必须不是包括了token中的所有字符，否则没有意义
  if (s == NULL)//没找到分隔符，即就剩一个分割串了，结束位置默认就是字符串的结束位置。
    // This token finishes the string.  
    olds = NULL;//(token, '\0');
  else//找到第一个分隔符位置，令第一个分隔符为‘\0’，olds指向下一次开始查找的位置。
    {
      // Terminate the token and make OLDS point past it.  
      *s = '\0';
      olds = s + 1;
    }
  return token;
}

char sentence[]="This is a sentence with 7 tokens";
	cout<<"The string to be tokenized is:\n"<<sentence<<"\n\nThe tokens are:\n\n";
	//char *tokenPtr=strtok2(sentence,"s");
	char *tokenPtr=strtok(sentence,"snkenot7wihcaT ");//如果分隔符包含所有的token里的字符，像这样"snkenot7wihcaT "那么第一次就返回空，
	while(tokenPtr!=NULL)
	{
		cout<<endl<<"token: "<<tokenPtr;
		//tokenPtr=strtok2(NULL," ");
		tokenPtr=strtok(NULL,"sell");
		cout<<"   sentence: "<<sentence<<endl;
	}

这是测试strtok的例子，第一次传递字符指针，之后传递NULL。

static char *_TOKEN;
char * __cdecl strtok4 (
        char * string,
        const char * control
        )
{
        unsigned char *str;
        const unsigned char *ctrl = (unsigned char *)control;

        unsigned char map[32];
        int count;

        /* Clear control map */
        for (count = 0; count < 32; count++)
                map[count] = 0;

        /* Set bits in delimiter table */
        do {
                map[*ctrl >> 3] |= (1 << (*ctrl & 7));
        } while (*ctrl++);

        /* Initialize str */

        /* If string is NULL, set str to the saved
         * pointer (i.e., continue breaking tokens out of the string
         * from the last strtok call) */
        if (string)
                str = (unsigned char *)string;
        else
                str = (unsigned char *)_TOKEN;

        /* Find beginning of token (skip over leading delimiters). Note that
         * there is no token iff this loop sets str to point to the terminal
         * null (*str == '\0') */
        while ( (map[*str >> 3] & (1 << (*str & 7))) && *str )//分隔符已经放到了map里，strcpn 在str里找不是分隔符的字符，作为起始位置。
                str++;
	//1 这里没有想linux实现 做了判断确保str不为‘\0’。这里既是str为‘、0’，下面的执行会返回NULL
        string = (char*)str;//string表示开始的位置

        /* Find the end of the token. If it is not the end of the string,
         * put a null there. */
        for ( ; *str ; str++ )
                if ( map[*str >> 3] & (1 << (*str & 7)) ) {//找str中存在的分隔符，找到的位置作为结束位置。一次查找完毕，跳出循环。
                        *str++ = '\0';
                        break;
                }
		//当str没找到时，*str为’\0‘，即为最后的一个分割所得的字符串
        /* Update nextoken (or the corresponding field in the per-thread data
         * structure */
        _TOKEN = (char*)str;//下一次调用 开始的位置

        /* Determine if a token has been found. */
        if ( (unsigned char *)string == str )//这里只要上面的for循环执行了一次，就说明找到了一个分割的字符串。只有经过while语句后，*str为'\0'时（string也是），for循环才不执行，
                return NULL;
        else
                return string;
}

这是VC++ C运行时库函数原型实现。

//1 size_t __cdecl strspn4 (
//2 size_t __cdecl strcspn4 (
char * __cdecl strpbrk4 (
const char * string,const char * control)//
{
	const unsigned char *str = (const unsigned char *)string;
	const unsigned char *ctrl = (const unsigned char *)control;

	unsigned char map[32];
	int count;

	/* Clear out bit map */
	for (count=0; count<32; count++)
		map[count] = 0;

	/* Set bits in control map */
	while (*ctrl)
	{
		map[*ctrl >> 3] |= (1 << (*ctrl & 7));//先由ctrl来设定map。把每一个字节分两部分，第一部分5位，作为map的索引，第二部分3位作为map的值。也就相当把ctrl放到map这个集合里了
		ctrl++;
	}

//#if ROUTINE == _STRSPN
	/*1 if (*str)
	{
		count=0;
		while (map[*str >> 3] & (1 << (*str & 7)))//设定完map，只要用str按同样的设定方式遍历map，看是否相等，若相等（即在集合中）则继续遍历，直到找到第一个不相等的输出位置即可。
		{           //结束时str为‘\0’，*str为0，&右侧为1，左侧map[0]为0，不等，退出；
			count++;
			str++;
		}
		return(count);
	}
	return(0);*/

//#elif ROUTINE == _STRCSPN
	/*2
	count=0;
	map[0] |= 1;   //为什么要加！，因为这次要找第一次相等的，当相等时，（）内结果为1，取非后为0，循环退出，成功返回。
	while (!(map[*str >> 3] & (1 << (*str & 7))))//循环的终止条件为&操作结果为1，所以两侧都要为1，才能终止。右侧肯定会为1，左侧初始化时为0了，所以要改变
	{//若map[0] 不置为1，结束时&左侧 map[0] 为0，右侧为1，&之后为0，！后为1，这样循环无法停止了。
		count++;
		str++;
	}
	return(count);
	*/
	/*2
	count=0;// strspn 
	while (*str)//
	{
		if (map[*str >> 3] & (1 << (*str & 7)))
			return(count);//一旦找到就返回
		str++;//没找到 则继续找
		count++;
	}
	return(count);
	*/

  // ROUTINE == _STRCSPN 
//
	while (*str)//这是实现strbrk
	{
		if (map[*str >> 3] & (1 << (*str & 7)))
			return((char *)str);//一旦找到就返回
		str++;//没找到 则继续找
	}
	return(NULL);
//
}

上面是VC++ C运行时库函数原型的实现。map就相当于一个集合，所有涉及集合的表示都可以用这种方法，牺牲空间换取时间。为什么用unsigned char* 呢？移位的时候前面统统补0，是这个原因吗？