全面深入介绍C语言字符串函数
----刘黎明(liuliming2008@126.com)
1 函数头文件
2 函数实现源代码
3 后记
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->
有人说,C语言就是提供了函数/结构体/指针,可是语言库确实相当重要, 特别是字符串库。
1 函数头文件
怎么用就不用介绍了,帮你回忆一下:
extern char * strcpy(char *,const char *);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->
//__kernel_size_t:最多复制__kernel_size_t个字节
extern char * strncpy(char *,const char *, __kernel_size_t);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->
//与strncpy略有不同
size_t strlcpy(char *, const char *, size_t);
extern char * strcat(char *, const char *);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->
//最多追加__kernel_size_t个字符到src
extern char * strncat(char *, const char *, __kernel_size_t);
extern size_t strlcat(char *, const char *, __kernel_size_t);
extern int strcmp(const char *,const char *);
extern int strncmp(const char *,const char *,__kernel_size_t);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->
//不管大小写的比较
extern int strnicmp(const char *, const char *, __kernel_size_t);
extern char * strchr(const char *,int);
extern char * strnchr(const char *, size_t, int);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->
//最后一次出现int的地址
extern char * strrchr(const char *,int);
extern char * strstr(const char *,const char *);
extern __kernel_size_t strlen(const char *);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->
//获得最长不超过__kernel_size_t长度的字符串的长度
extern __kernel_size_t strnlen(const char *,__kernel_size_t);
//在源字符串(source-string)中找出最先含有搜索字符串(searching-string)中的任一字符的位置并返回,若找不到则返回空指针
extern char * strpbrk(const char *,const char *);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->
//返回传入的char **, char **指向传入的字符串中被const char *分隔开的下部分字符串
extern char * strsep(char **,const char *);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->
//传回str1最初的部份的长度,这个部份是完全地由str2中的字符所构成的
extern __kernel_size_t strspn(const char *,const char *);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->
//传回str1最初的部份的长度,这个部份没有str2中的任何字符
extern __kernel_size_t strcspn(const char *,const char *);
extern void * memset(void *,int,__kernel_size_t);
extern void * memcpy(void *,const void *,__kernel_size_t);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->
//如果源和目的参数有重叠, memmove() 提供有保证的行为。而 memcpy() 则不能提供这样的保证, 因此可以实现得更加有效率。如果有疑问, 最好使用 memmove()。memmove可以把自己的一部分拷贝给自己的另一部分。其他函数不行
extern void * memmove(void *,const void *,__kernel_size_t);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->
//在长度为__kernel_size_t的内存中查找int
extern void * memscan(void *,int,__kernel_size_t);
extern int memcmp(const void *,const void *,__kernel_size_t);
extern void * memchr(const void *,int,__kernel_size_t);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->
//申请内存,复制s到该内存
extern char *kstrdup(const char *s, gfp_t gfp);
2 函数实现源代码
/*
* linux/lib/string.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
/*
* stupid library routines.. The optimized versions should generally be found
* as inline code in <asm-xx/string.h>
*
* These are buggy as well..
*
* * Fri Jun 25 1999, Ingo Oeser <ioe@informatik.tu-chemnitz.de>
* - Added strsep() which will replace strtok() soon (because strsep() is
* reentrant and should be faster). Use only strsep() in new code, please.
*
* * Sat Feb 09 2002, Jason Thomas <jason@topic.com.au>,
* Matthew Hawkins <matt@mh.dropbear.id.au>
* - Kissed strtok() goodbye
*/
#include <linux/types.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/module.h>
#ifndef __HAVE_ARCH_STRNICMP
/**
* strnicmp - Case insensitive, length-limited string comparison
* @s1: One string
* @s2: The other string
* @len: the maximum number of characters to compare
*/
int strnicmp(const char *s1, const char *s2, size_t len)
{
/* Yes, Virginia , it had better be unsigned */
unsigned char c1, c2;
c1 = c2 = 0;
if (len) {
do {
c1 = *s1;
c2 = *s2;
s1++;
s2++;
if (!c1)
break;
if (!c2)
break;
if (c1 == c2)
continue;
c1 = tolower(c1);
c2 = tolower(c2);
if (c1 != c2)
break;
} while (--len);
}
return (int)c1 - (int)c2;
}
EXPORT_SYMBOL(strnicmp);
#endif
#ifndef __HAVE_ARCH_STRCPY
/**
* strcpy - Copy a %NUL terminated string
* @dest: Where to copy the string to
* @src: Where to copy the string from
*/
#undef strcpy
char *strcpy(char *dest, const char *src)
{
char *tmp = dest;
while ((*dest++ = *src++) != '/0')
/* nothing */;
return tmp;
}
EXPORT_SYMBOL(strcpy);
#endif
#ifndef __HAVE_ARCH_STRNCPY
/**
* strncpy - Copy a length-limited, %NUL-terminated string
* @dest: Where to copy the string to
* @src: Where to copy the string from
* @count: The maximum number of bytes to copy
*
* The result is not %NUL-terminated if the source exceeds
* @count bytes.
*
* In the case where the length of @src is less than that of
* count, the remainder of @dest will be padded with %NUL.
*
*/
char *strncpy(char *dest, const char *src, size_t count)
{
char *tmp = dest;
while (count) {
if ((*tmp = *src) != 0)
src++;
tmp++;
count--;
}
return dest;
}
EXPORT_SYMBOL(strncpy);
#endif
#ifndef __HAVE_ARCH_STRLCPY
/**
* strlcpy - Copy a %NUL terminated string into a sized buffer
* @dest: Where to copy the string to
* @src: Where to copy the string from
* @size: size of destination buffer
*
* Compatible with *BSD: the result is always a valid
* NUL-terminated string that fits in the buffer (unless,
* of course, the buffer size is zero). It does not pad
* out the result like strncpy() does.
*/
size_t strlcpy(char *dest, const char *src, size_t size)
{
size_t ret = strlen(src);
if (size) {
size_t len = (ret >= size) ? size - 1 : ret;
memcpy(dest, src, len);
dest[len] = '/0';
}
return ret;
}
EXPORT_SYMBOL(strlcpy);
#endif
#ifndef __HAVE_ARCH_STRCAT
/**
* strcat - Append one %NUL-terminated string to another
* @dest: The string to be appended to
* @src: The string to append to it
*/
#undef strcat
char *strcat(char *dest, const char *src)
{
char *tmp = dest;
while (*dest)
dest++;
while ((*dest++ = *src++) != '/0')
;
return tmp;
}
EXPORT_SYMBOL(strcat);
#endif
#ifndef __HAVE_ARCH_STRNCAT
/**
* strncat - Append a length-limited, %NUL-terminated string to another
* @dest: The string to be appended to
* @src: The string to append to it
* @count: The maximum numbers of bytes to copy
*
* Note that in contrast to strncpy, strncat ensures the result is
* terminated.
*/
char *strncat(char *dest, const char *src, size_t count)
{
char *tmp = dest;
if (count) {
while (*dest)
dest++;
while ((*dest++ = *src++) != 0) {
if (--count == 0) {
*dest = '/0';
break;
}
}
}
return tmp;
}
EXPORT_SYMBOL(strncat);
#endif
#ifndef __HAVE_ARCH_STRLCAT
/**
* strlcat - Append a length-limited, %NUL-terminated string to another
* @dest: The string to be appended to
* @src: The string to append to it
* @count: The size of the destination buffer.
*/
size_t strlcat(char *dest, const char *src, size_t count)
{
size_t dsize = strlen(dest);
size_t len = strlen(src);
size_t res = dsize + len;
/* This would be a bug */
BUG_ON(dsize >= count);
dest += dsize;
count -= dsize;
if (len >= count)
len = count-1;
memcpy(dest, src, len);
dest[len] = 0;
return res;
}
EXPORT_SYMBOL(strlcat);
#endif
#ifndef __HAVE_ARCH_STRCMP
/**
* strcmp - Compare two strings
* @cs: One string
* @ct: Another string
*/
#undef strcmp
int strcmp(const char *cs, const char *ct)
{
signed char __res;
while (1) {
if ((__res = *cs - *ct++) != 0 || !*cs++)
break;
}
return __res;
}
EXPORT_SYMBOL(strcmp);
#endif
#ifndef __HAVE_ARCH_STRNCMP
/**
* strncmp - Compare two length-limited strings
* @cs: One string
* @ct: Another string
* @count: The maximum number of bytes to compare
*/
int strncmp(const char *cs, const char *ct, size_t count)
{
signed char __res = 0;
while (count) {
if ((__res = *cs - *ct++) != 0 || !*cs++)
break;
count--;
}
return __res;
}
EXPORT_SYMBOL(strncmp);
#endif
#ifndef __HAVE_ARCH_STRCHR
/**
* strchr - Find the first occurrence of a character in a string
* @s: The string to be searched
* @c: The character to search for
*/
char *strchr(const char *s, int c)
{
for (; *s != (char)c; ++s)
if (*s == '/0')
return NULL;
return (char *)s;
}
EXPORT_SYMBOL(strchr);
#endif
#ifndef __HAVE_ARCH_STRRCHR
/**
* strrchr - Find the last occurrence of a character in a string
* @s: The string to be searched
* @c: The character to search for
*/
char *strrchr(const char *s, int c)
{
const char *p = s + strlen(s);
do {
if (*p == (char)c)
return (char *)p;
} while (--p >= s);
return NULL;
}
EXPORT_SYMBOL(strrchr);
#endif
#ifndef __HAVE_ARCH_STRNCHR
/**
* strnchr - Find a character in a length limited string
* @s: The string to be searched
* @count: The number of characters to be searched
* @c: The character to search for
*/
char *strnchr(const char *s, size_t count, int c)
{
for (; count-- && *s != '/0'; ++s)
if (*s == (char)c)
return (char *)s;
return NULL;
}
EXPORT_SYMBOL(strnchr);
#endif
/**
* strstrip - Removes leading and trailing whitespace from @s.
* @s: The string to be stripped.
*
* Note that the first trailing whitespace is replaced with a %NUL-terminator
* in the given string @s. Returns a pointer to the first non-whitespace
* character in @s.
*/
char *strstrip(char *s)
{
size_t size;
char *end;
size = strlen(s);
if (!size)
return s;
end = s + size - 1;
while (end != s && isspace(*end))
end--;
*(end + 1) = '/0';
while (*s && isspace(*s))
s++;
return s;
}
EXPORT_SYMBOL(strstrip);
#ifndef __HAVE_ARCH_STRLEN
/**
* strlen - Find the length of a string
* @s: The string to be sized
*/
size_t strlen(const char *s)
{
const char *sc;
for (sc = s; *sc != '/0'; ++sc)
/* nothing */;
return sc - s;
}
EXPORT_SYMBOL(strlen);
#endif
#ifndef __HAVE_ARCH_STRNLEN
/**
* strnlen - Find the length of a length-limited string
* @s: The string to be sized
* @count: The maximum number of bytes to search
*/
size_t strnlen(const char *s, size_t count)
{
const char *sc;
for (sc = s; count-- && *sc != '/0'; ++sc)
/* nothing */;
return sc - s;
}
EXPORT_SYMBOL(strnlen);
#endif
#ifndef __HAVE_ARCH_STRSPN
/**
* strspn - Calculate the length of the initial substring of @s which only
* contain letters in @accept
* @s: The string to be searched
* @accept: The string to search for
*/
size_t strspn(const char *s, const char *accept)
{
const char *p;
const char *a;
size_t count = 0;
for (p = s; *p != '/0'; ++p) {
for (a = accept; *a != '/0'; ++a) {
if (*p == *a)
break;
}
if (*a == '/0')
return count;
++count;
}
return count;
}
EXPORT_SYMBOL(strspn);
#endif
#ifndef __HAVE_ARCH_STRCSPN
/**
* strcspn - Calculate the length of the initial substring of @s which does
* not contain letters in @reject
* @s: The string to be searched
* @reject: The string to avoid
*/
size_t strcspn(const char *s, const char *reject)
{
const char *p;
const char *r;
size_t count = 0;
for (p = s; *p != '/0'; ++p) {
for (r = reject; *r != '/0'; ++r) {
if (*p == *r)
return count;
}
++count;
}
return count;
}
EXPORT_SYMBOL(strcspn);
#endif
#ifndef __HAVE_ARCH_STRPBRK
/**
* strpbrk - Find the first occurrence of a set of characters
* @cs: The string to be searched
* @ct: The characters to search for
*/
char *strpbrk(const char *cs, const char *ct)
{
const char *sc1, *sc2;
for (sc1 = cs; *sc1 != '/0'; ++sc1) {
for (sc2 = ct; *sc2 != '/0'; ++sc2) {
if (*sc1 == *sc2)
return (char *)sc1;
}
}
return NULL;
}
EXPORT_SYMBOL(strpbrk);
#endif
#ifndef __HAVE_ARCH_STRSEP
/**
* strsep - Split a string into tokens
* @s: The string to be searched
* @ct: The characters to search for
*
* strsep() updates @s to point after the token, ready for the next call.
*
* It returns empty tokens, too, behaving exactly like the libc function
* of that name. In fact, it was stolen from glibc2 and de-fancy-fied.
* Same semantics, slimmer shape. ;)
*/
char *strsep(char **s, const char *ct)
{
char *sbegin = *s;
char *end;
if (sbegin == NULL)
return NULL;
end = strpbrk(sbegin, ct);
if (end)
*end++ = '/0';
*s = end;
return sbegin;
}
EXPORT_SYMBOL(strsep);
#endif
#ifndef __HAVE_ARCH_MEMSET
/**
* memset - Fill a region of memory with the given value
* @s: Pointer to the start of the area.
* @c: The byte to fill the area with
* @count: The size of the area.
*
* Do not use memset() to access IO space, use memset_io() instead.
*/
void *memset(void *s, int c, size_t count)
{
char *xs = s;
while (count--)
*xs++ = c;
return s;
}
EXPORT_SYMBOL(memset);
#endif
#ifndef __HAVE_ARCH_MEMCPY
/**
* memcpy - Copy one area of memory to another
* @dest: Where to copy to
* @src: Where to copy from
* @count: The size of the area.
*
* You should not use this function to access IO space, use memcpy_toio()
* or memcpy_fromio() instead.
*/
void *memcpy(void *dest, const void *src, size_t count)
{
char *tmp = dest;
const char *s = src;
while (count--)
*tmp++ = *s++;
return dest;
}
EXPORT_SYMBOL(memcpy);
#endif
#ifndef __HAVE_ARCH_MEMMOVE
/**
* memmove - Copy one area of memory to another
* @dest: Where to copy to
* @src: Where to copy from
* @count: The size of the area.
*
* Unlike memcpy(), memmove() copes with overlapping areas.
*/
void *memmove(void *dest, const void *src, size_t count)
{
char *tmp;
const char *s;
if (dest <= src) {
tmp = dest;
s = src;
while (count--)
*tmp++ = *s++;
} else {
tmp = dest;
tmp += count;
s = src;
s += count;
while (count--)
*--tmp = *--s;
}
return dest;
}
EXPORT_SYMBOL(memmove);
#endif
#ifndef __HAVE_ARCH_MEMCMP
/**
* memcmp - Compare two areas of memory
* @cs: One area of memory
* @ct: Another area of memory
* @count: The size of the area.
*/
#undef memcmp
int memcmp(const void *cs, const void *ct, size_t count)
{
const unsigned char *su1, *su2;
int res = 0;
for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
if ((res = *su1 - *su2) != 0)
break;
return res;
}
EXPORT_SYMBOL(memcmp);
#endif
#ifndef __HAVE_ARCH_MEMSCAN
/**
* memscan - Find a character in an area of memory.
* @addr: The memory area
* @c: The byte to search for
* @size: The size of the area.
*
* returns the address of the first occurrence of @c, or 1 byte past
* the area if @c is not found
*/
void *memscan(void *addr, int c, size_t size)
{
unsigned char *p = addr;
while (size) {
if (*p == c)
return (void *)p;
p++;
size--;
}
return (void *)p;
}
EXPORT_SYMBOL(memscan);
#endif
#ifndef __HAVE_ARCH_STRSTR
/**
* strstr - Find the first substring in a %NUL terminated string
* @s1: The string to be searched
* @s2: The string to search for
*/
char *strstr(const char *s1, const char *s2)
{
int l1, l2;
l2 = strlen(s2);
if (!l2)
return (char *)s1;
l1 = strlen(s1);
while (l1 >= l2) {
l1--;
if (!memcmp(s1, s2, l2))
return (char *)s1;
s1++;
}
return NULL;
}
EXPORT_SYMBOL(strstr);
#endif
#ifndef __HAVE_ARCH_MEMCHR
/**
* memchr - Find a character in an area of memory.
* @s: The memory area
* @c: The byte to search for
* @n: The size of the area.
*
* returns the address of the first occurrence of @c, or %NULL
* if @c is not found
*/
void *memchr(const void *s, int c, size_t n)
{
const unsigned char *p = s;
while (n-- != 0) {
if ((unsigned char)c == *p++) {
return (void *)(p - 1);
}
}
return NULL;
}
EXPORT_SYMBOL(memchr);
#endif
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/module.h>
#include <linux/err.h>
#include <asm/uaccess.h>
/**
* __kzalloc - allocate memory. The memory is set to zero.
* @size: how many bytes of memory are required.
* @flags: the type of memory to allocate.
*/
void *__kzalloc(size_t size, gfp_t flags)
{
void *ret = ____kmalloc(size, flags);
if (ret)
memset(ret, 0, size);
return ret;
}
EXPORT_SYMBOL(__kzalloc);
/*
* kstrdup - allocate space for and copy an existing string
*
* @s: the string to duplicate
* @gfp: the GFP mask used in the kmalloc() call when allocating memory
*/
char *kstrdup(const char *s, gfp_t gfp)
{
size_t len;
char *buf;
if (!s)
return NULL;
len = strlen(s) + 1;
buf = ____kmalloc(len, gfp);
if (buf)
memcpy(buf, s, len);
return buf;
}
EXPORT_SYMBOL(kstrdup);
/*
* strndup_user - duplicate an existing string from user space
*
* @s: The string to duplicate
* @n: Maximum number of bytes to copy, including the trailing NUL.
*/
char *strndup_user(const char __user *s, long n)
{
char *p;
long length;
length = strnlen_user(s, n);
if (!length)
return ERR_PTR(-EFAULT);
if (length > n)
return ERR_PTR(-EINVAL);
p = kmalloc(length, GFP_KERNEL);
if (!p)
return ERR_PTR(-ENOMEM);
if (copy_from_user(p, s, length)) {
kfree(p);
return ERR_PTR(-EFAULT);
}
p[length - 1] = '/0';
return p;
}
EXPORT_SYMBOL(strndup_user);
3 后记
Windows上的实现与平台相关性稍大,不好理解,虽然也很好用。
Linux下的string.c实现在/linuxxx.xx.xx/lib
String.h在/linuxxx.xx.xx/include/linux
汇编实现在/linuxxx.xx.xx/include/asm
是不是觉得string.c写的也不是那么完美,也许你能写出更好的。但是在你看源代码之前,你能写出这样的效率和简洁的实现么?
为什么没有参数检查?也许这是为高手实现的,高手是不会出现参数错误,呵呵,这当然是玩笑。可能是出于效率考虑吧,参数传入前自己检查好了。