glibc 系列之strlen（）函数

最新推荐文章于 2022-11-02 11:59:45 发布

一个充满激情的猴子

最新推荐文章于 2022-11-02 11:59:45 发布

阅读量599

点赞数 1

分类专栏： glibc 文章标签： strlen glibc

本文链接：https://blog.csdn.net/u010540535/article/details/74937788

版权

glibc 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

glibc系列之strlen（）函数学习。

首先奉上的是strlen的源代码：

size_t
STRLEN (const char *str)  可以看到返回值是个size_t，输入参数是char型数组。
{
  const char *char_ptr;
  const unsigned long int *longword_ptr;
  unsigned long int longword, himagic, lomagic;

  /* Handle the first few characters by reading one character at a time.
     Do this until CHAR_PTR is aligned on a longword boundary.  */
  for (char_ptr = str; ((unsigned long int) char_ptr
            & (sizeof (longword) - 1)) != 0;     这里在做的是字节对齐。
       ++char_ptr)
    if (*char_ptr == '\0')
      return char_ptr - str;

  /* All these elucidatory comments refer to 4-byte longwords,
     but the theory applies equally well to 8-byte longwords.  */

  longword_ptr = (unsigned long int *) char_ptr;

  /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
     the "holes."  Note that there is a hole just to the left of
     each byte, with an extra at the end:

     bits:  01111110 11111110 11111110 11111111
     bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD

     The 1-bits make sure that carries propagate to the next 0-bit.
     The 0-bits provide holes for carries to fall into.  */
  himagic = 0x80808080L;      这两个魔数很重要。
  lomagic = 0x01010101L;
  if (sizeof (longword) > 4)
    {
      /* 64-bit version of the magic.  */
      /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
      himagic = ((himagic << 16) << 16) | himagic;
      lomagic = ((lomagic << 16) << 16) | lomagic;
    }
  if (sizeof (longword) > 8)
    abort ();

  /* Instead of the traditional loop which tests each character,
     we will test a longword at a time.  The tricky part is testing
     if *any of the four* bytes in the longword in question are zero.  */
  for (;;)
    {
      longword = *longword_ptr++;

      if (((longword - lomagic) & ~longword & himagic) != 0)   主要是判断这连续的四个字节有没有0x0，如果有0，那么进入到if语句中，
    {
      /* Which of the bytes was the zero?  If none of them were, it was
         a misfire; continue the search.  */

      const char *cp = (const char *) (longword_ptr - 1);

      if (cp[0] == 0)
        return cp - str;
      if (cp[1] == 0)
        return cp - str + 1;
      if (cp[2] == 0)
        return cp - str + 2;
      if (cp[3] == 0)
        return cp - str + 3;
      if (sizeof (longword) > 4)
        {
          if (cp[4] == 0)
        return cp - str + 4;
          if (cp[5] == 0)
        return cp - str + 5;
          if (cp[6] == 0)
        return cp - str + 6;
          if (cp[7] == 0)
        return cp - str + 7;
        }
    }
    }
}

下面是代码解析

首先给出俩例子来形象的说明这个问题。
1
himagic = 0x80808080L; 1000-0000 1000-0000 1000-0000 1000-0000
lomagic = 0x01010101L; 0000-0001 0000-0001 0000-0001 0000-0001
((longword - lomagic) & ~longword & himagic) != 0
举个例子来说明这个问题：
longword = 000000ff
a =(longword - lomagic) =fefefffe
b = ~longword = ffffff00
a&b = fefeff00
himagic = 80808080
a&b&himagic 80808000

2
另一个例子： longword =00000000
a =(longword - lomagic) =fefefeff

b = ~longword =ffffffff

a&b = fefefeff

himagic = 80808080

a&b&himagic = 80808080

其实最主要的是if (((longword - lomagic) & ~longword & himagic) != 0) 怎么理解的。

首先考虑到的是ASCII码最高位是是0， ~longword & himagic运算的结果就是看看哪个字节的最高位不是0.如果都是0 ，则运算结果是0x80808080.
longword - lomagic 运算结果是一旦longword 有一个字节为0，那么久会是0xfe。也就是找到了‘\0’,进入了if体里。

下面是我自己写的strlen（）函数。

#define ulint unsigned long int  //宏定义ulint
unsigned int 
mglStrLen(const char * str)
{
    //allign
    const char *tempStr ;
    for (tempStr = str; (ulint)tempStr &(sizeof(ulint) - 1) != 0; tempStr++)
    {
        if (*tempStr == '\0')
            return tempStr - str;
    }

    //caculate the length
    ulint himagic = 0x80808080;
    ulint lomagic = 0x01010101;
    const ulint* longwordPtr = ( const ulint *)tempStr;  //这里也要注意对const char* 强制类型转化成const ulint*时也需要( const ulint *) 。这里的const修饰很有意思。
    while (1)
    {
        longwordPtr++;
        if (((*longwordPtr - lomagic)&~(*longwordPtr)&himagic) != 0)  //跟glibc一样的处理方法。
        {
            if (*(char *)longwordPtr == 0)     //这里要注意的是longwordPtr 指向的int型，对于单个的字节比对，需要（char*）强制类型转换。
                return (char *)longwordPtr - str; //这里的强制类型转化是因为str是const char* ，他们要一样。
            if (*((char *)longwordPtr+1) == 0)
                return (char *)longwordPtr - str+1;
            if (*((char *)longwordPtr + 2) == 0)
                return (char *)longwordPtr - str + 2;
            if (*((char *)longwordPtr + 3) == 0)
                return (char *)longwordPtr - str + 3;
        }
    }
}
void main()
{
    char str[] = "mglhahaahhahah";  ---->14个字节
    int len = mglStrLen(str);
    cout << "mgl  " <<len<< endl;

}