看看str系列函数的实现 一

很多面试题都会提到自己来实现一个函数,strcmp,strcpy,strstr……
我们来看一下一下函数的实现:

int
strcmp (p1, p2)
     const char *p1;
     const char *p2;
{
  register const unsigned char *s1 = (const unsigned char *) p1;
  //register 表示向编译器建议使用高速寄存器存储变量
  //const 表示指向的字符串不被修改
  //unsigned 表示支持unicode字符
  //char 就不用说了^_^
  register const unsigned char *s2 = (const unsigned char *) p2;
  unsigned reg_char c1, c2;

  do
    {
      c1 = (unsigned char) *s1++;
      c2 = (unsigned char) *s2++;
      if (c1 == '/0')
 return c1 - c2;//比较的精华所在
    }
  while (c1 == c2);

  return c1 - c2;
}
早上偶想了一上午看了这个代码才知道差距所在,do while 语句莫有想到 c1-c2 也莫有想到.

下面来看个稍微复杂点的,strstr,偶们公司面试经常出这道题,不少所谓的英雄好汉折戟沉沙于此,其实不最求效率的话,
两个for循环也可以搞定,最求效率就要看这个了.

typedef unsigned chartype;

char   *
strstr (phaystack, pneedle)
     
const   char   * phaystack;
     
const   char   * pneedle;
{
  
const  unsigned  char   * haystack,  * needle;
  chartype b;
  
const  unsigned  char   * rneedle;

  haystack 
=  ( const  unsigned  char   * ) phaystack;

  
if  ((b  =   * (needle  =  ( const  unsigned  char   * ) pneedle)))
    {
      chartype c;
      haystack
-- ;   /*  possible ANSI violation  */
// 这个是为了后面*++haystack 否则第一位获取不到
      {
 chartype a;
 
do
   
if  ( ! (a  =   *++ haystack)) // 如果无法赋值,说明phaystack是空字符串
      goto  ret0;
 
while  (a  !=  b); // 找到第一个ab字符串相等的位置
      }

      
if  ( ! (c  =   *++ needle)) // 比较字符串向后移一位
  goto  foundneedle; // 如果无法赋值说明比较字符串只有一位,返回比较得到的位置
       ++ needle; // 很疑惑这里卫生么要+1,但是看到后面jin部分的代码,c的值已经做过保存,+1用得很漂亮
       goto  jin;

      
for  (;;)
 {
   {
     chartype a;
// 这个定义也是非常的搞,卫生么不定义在函数开头?
      if  ( 0 ) // 下面的jin:就是说在for循环内永远无法执行,但是可以被跳转
     jin:{
  
if  ((a  =   *++ haystack)  ==  c) // 如果两个相等则比较剩余字符
     goto  crest;
       }
     
else
       a 
=   *++ haystack; // 赋值
      do
       {
  
for  (; a  !=  b; a  =   *++ haystack) // b是比较字符串的第一位 c是第二位
    {
      
if  ( ! a)
        
goto  ret0;
      
if  ((a  =   *++ haystack)  ==  b) // 第一位相等
         break ;
      
if  ( ! a)
        
goto  ret0;
    }
       }
     
while  ((a  =   *++ haystack)  !=  c); // 如果能够跳出这个循环说明第二位也相等
   }
 crest:
// 从第三位开始处理
   {
     chartype a;
     {
       
const  unsigned  char   * rhaystack;
       
if  ( * (rhaystack  =  haystack --   +   1 ==  (a  =   * (rneedle  =  needle))) // 如果当前的字符相等
   do
    {
      
if  ( ! a) // 如果a==0 表示到了字符串尾
         goto  foundneedle;
      
if  ( *++ rhaystack  !=  (a  =   *++ needle)) // 如果下一位不等 跳出 do while循环
         break ;
      
if  ( ! a) // 如果a==0 表示到了字符串尾
         goto  foundneedle;
    }
  
while  ( *++ rhaystack  ==  (a  =   *++ needle)); // 如果相等 继续比较
       needle  =  rneedle;  /*  took the register-poor aproach  */
    
// 到了这里表示比较失败需要回溯 needle 指针回到原来的值
     }
     
if  ( ! a) // 如果a==0 跳出for循环 会直接到foundneedle 标签 所以如果使用goto foundneedle; 我想也没有什么关系
        break ;
   }
 }
    }
foundneedle:
  
return  ( char   * ) haystack;
ret0:
  
return   0 ;
}

看完这段代码,莫有惊喜,从作者自信满满的口气中(Until someone tells me otherwise, I assume that this is the
fastest implementation of strstr() in C),我还以为至少效率会超过kmp的,看完以后,这只不过是个精巧的回溯算法罢了,
多次用到的go to 也末有什么特色,不过这个人很有意思的一点是不写注释也就罢了,还要故意说出来(I deliberately chose not to comment it.  You should have at least
 as much fun trying to understand it, as I had to write it :-).) 鄙视你
 
 附另一个人写的,貌似加州大学的一个教授应该是solaris上的实现
 

char     *   
  strstr(
string ,   substring)  
          register   
char     * string ;              /*    String   to   search.    */   
          
char     * substring;                          /*    Substring   to   try   to   find   in   string.    */   
  {  
          register   
char     * a,    * b;  
   
          
/*    First   scan   quickly   through   the   two   strings   looking   for   a  
              *   single-character   match.     When   it's   found,   then   compare   the  
            *   rest   of   the   substring.  
            
*/   
   
          b   
=    substring;  
          
if    ( * b    ==     0 )   {  
                  
return     string ;  
          }
// 如果查找字符串不存在直接返回string  
           for    (   ;    * string     !=     0 ;    string     +=     1 )   {  
                  
if    ( * string     !=     * b)   {   // 如果第一位不相等 比较下一位
                           continue ;  
                  }  
                  a   
=     string ;  
                  
while    ( 1 )   {  
                          
if    ( * b    ==     0 )   {   // 到了查找字符串结尾,说明已经找到
                                   return     string ;  
                          }  
                          
if    ( * a ++     !=     * b ++ )   {   // 有不相同的,跳出循环
                                   break ;  
                          }  
                  }  
                  b   
=    substring;   // 恢复查找字符串
          }  
          
return    ( char     * )    0 ;  
  }

这个算法看上去方方正正,冲正平和,让人一看上去就觉得程序就应该是这么写的,而你自己放手去写却写不出这个味道来,所谓大巧
不工,莫过于此.
同是回溯算法,我真的不认为第一个除了神叨叨的故弄玄虚了半天,效率上有多少提升. 

再来看看strlen,偶自信满满的写了一个先:

size_t strlen(str)
const   char *  str;
{
  register 
const  unsigned  char   *  s = str ;
while ( * != 0 )
  s
++  ;
return  s - str ;
}

 

自我感觉相当良好,再看看人家写的,完全不懂! 我靠 偶整整落后一百年!

 /* Return the length of the null-terminated string STR.  Scan for
   the null terminator quickly by testing four bytes at a time.  */
size_t
strlen (str)
     const char *str;
{
  const char *char_ptr;
  const unsigned long int *longword_ptr;
  unsigned long int longword, magic_bits, himagic, lomagic;

  /* Handle the first few characters by reading one character at a time.
     Do this until CHAR_PTR is aligned on a longword boundary.  */
  for (char_ptr = str; ((unsigned long int) char_ptr
   & (sizeof (longword) - 1)) != 0;
       ++char_ptr)
    if (*char_ptr == '/0')
      return char_ptr - str;

  /* All these elucidatory comments refer to 4-byte longwords,
     but the theory applies equally well to 8-byte longwords.  */

  longword_ptr = (unsigned long int *) char_ptr;

  /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
     the "holes."  Note that there is a hole just to the left of
     each byte, with an extra at the end:

     bits:  01111110 11111110 11111110 11111111
     bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD

     The 1-bits make sure that carries propagate to the next 0-bit.
     The 0-bits provide holes for carries to fall into.  */
  magic_bits = 0x7efefeffL;
  himagic = 0x80808080L;
  lomagic = 0x01010101L;
  if (sizeof (longword) > 4)
    {
      /* 64-bit version of the magic.  */
      /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
      magic_bits = ((0x7efefefeL << 16) << 16) | 0xfefefeffL;
      himagic = ((himagic << 16) << 16) | himagic;
      lomagic = ((lomagic << 16) << 16) | lomagic;
    }
  if (sizeof (longword) > 8)
    abort ();

  /* Instead of the traditional loop which tests each character,
     we will test a longword at a time.  The tricky part is testing
     if *any of the four* bytes in the longword in question are zero.  */
  for (;;)
    {
      /* We tentatively exit the loop if adding MAGIC_BITS to
  LONGWORD fails to change any of the hole bits of LONGWORD.

  1) Is this safe?  Will it catch all the zero bytes?
  Suppose there is a byte with all zeros.  Any carry bits
  propagating from its left will fall into the hole at its
  least significant bit and stop.  Since there will be no
  carry from its most significant bit, the LSB of the
  byte to the left will be unchanged, and the zero will be
  detected.

  2) Is this worthwhile?  Will it ignore everything except
  zero bytes?  Suppose every byte of LONGWORD has a bit set
  somewhere.  There will be a carry into bit 8.  If bit 8
  is set, this will carry into bit 16.  If bit 8 is clear,
  one of bits 9-15 must be set, so there will be a carry
  into bit 16.  Similarly, there will be a carry into bit
  24.  If one of bits 24-30 is set, there will be a carry
  into bit 31, so all of the hole bits will be changed.

  The one misfire occurs when bits 24-30 are clear and bit
  31 is set; in this case, the hole at bit 31 is not
  changed.  If we had access to the processor carry flag,
  we could close this loophole by putting the fourth hole
  at bit 32!

  So it ignores everything except 128's, when they're aligned
  properly.  */

      longword = *longword_ptr++;

      if (
#if 0
   /* Add MAGIC_BITS to LONGWORD.  */
   (((longword + magic_bits)

     /* Set those bits that were unchanged by the addition.  */
     ^ ~longword)

    /* Look at only the hole bits.  If any of the hole bits
       are unchanged, most likely one of the bytes was a
       zero.  */
    & ~magic_bits)
#else
   ((longword - lomagic) & himagic)
#endif
   != 0)
 {
   /* Which of the bytes was the zero?  If none of them were, it was
      a misfire; continue the search.  */

   const char *cp = (const char *) (longword_ptr - 1);

   if (cp[0] == 0)
     return cp - str;
   if (cp[1] == 0)
     return cp - str + 1;
   if (cp[2] == 0)
     return cp - str + 2;
   if (cp[3] == 0)
     return cp - str + 3;
   if (sizeof (longword) > 4)
     {
       if (cp[4] == 0)
  return cp - str + 4;
       if (cp[5] == 0)
  return cp - str + 5;
       if (cp[6] == 0)
  return cp - str + 6;
       if (cp[7] == 0)
  return cp - str + 7;
     }
 }
    }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值