[C语言]字符串处理 - 提取字符串指定部分(支持中文字符)
2008-11-5: 增加StringSubU(可处理中文字符串,以前的StringSub遇到中文会取半个字符),为此写了StrLenU和StrSetPosU函数。字符串以1开始计算,如果取的字符个数为-1等负值时,表示取从初始值开始的所有字符。
/*C代码如下*/
#include <stdio.h>
/*处理中文字符*/
/*遍历字符串,非ASCII字符读取2个字节,ASCII读取一个字节,获取字符串长度*/
int StrLenU(const char* string)
{
int len = 0 ;
const char* p = string;
while(*p++ != '\0')
{
if(*p > 0x80 || *p < 0)
{
p++;
}
len++;
}
return len;
}
/*遍历字符串,非ASCII字符读取2个字节,ASCII读取一个字节,返回指定位置的字符串指针,默认从1开始*/
char* StrSetPosU(const char* string,int pos)
{
char* result;
result = string;
while (result != NULL && *result != '\0' && pos > 1)
{
if(*result > 0x80 || *result < 0)
{
result++;
}
result++;
pos--;
}
if(pos!= 0)
return result;
return '\0';
}
/*获取指定内存中的字符串个数,中文字符作为一个字符*/
int StrLenMemU(const char* string,int size)
{
int len = 0 ;
const char* p = string;
while(*p++ != '\0' && size > 0)
{
if(*p > 0x80 || *p < 0)
{
p++;
size--;
}
size-- ;
len++;
}
return len;
}
/*可取中文字符串,当number为-1等负数时,取从start开始的剩余所有字符,默认从1开始*/
char* StringSubU(const char* string,int start,int number)
{
int len = StrLenU(string) ;
if(start>len)
{
printf( "Start %d is too big than string length %d! \n ",start,len);
return NULL;
}
int bufsize = 0;
int num = number;
const char* p = string;
const char* start_char =string;
/*重置指针,获取指定开始位置*/
p = StrSetPosU(string,start);
start_char = p;
/*当取值为负值时,则取全部值*/
if(number < 0)
{
while(*p != '\0')
{
p++;
bufsize++;
}
}
else
{
while( 1)
{
/*当指针移到末尾,而且还没有获取指定数的字符时,说明此时指定字符数过多,将会取剩下的所有值*/
if(*p == '\0' && num > 0)
{
printf( "Number : %d is to big! \n ",number);
break;
}
/*当num为0时,说明读取字符已经满足要求*/
else if(num == 0 )
break;
/*当字符为ASCII时,*/
if(*p > 0x80 || *p < 0)
{
bufsize++;
p++;
}
bufsize++;
p++;
num--;
}
}
num = bufsize;
/*开始分配内存*/
char* result ;
result = (char*)malloc(sizeof(char)*(bufsize+ 1));
memset(result, 0,sizeof(char)*(bufsize+ 1));
/*开始复制字符串*/
int i = 0;
int j = 0;
while(num != 0)
{
result[i++] = start_char[j++];
num--;
}
/*尾部置零*/
result[bufsize] = '\0';
return result;
}
int main()
{
/*进行测试*/
char* t = "a哈哈aab和c哈";
printf( "length: %d \n ",StrLenU( "哈哈a哈a哈"));
printf( "指向前%s \n 指向后:%s \n ",t,StrSetPosU(t, 3));
printf( "全字符时字符个数:%d \n ",StrLenMemU(t, 6));
printf( "半个字符时字符个数:%d \n ",StrLenMemU(t, 4));
printf( "1.正常取值:%s \n ",StringSubU( "a哈aa哈a", 1, 2));
printf( "2.负值取值:%s \n ",StringSubU( "a哈aa哈a",- 1, 2));
printf( "3.起始值过大:%s \n ",StringSubU( "a哈aa哈a", 7, 2));
printf( "4.取值过大:%s \n ",StringSubU( "a哈aa哈a", 5, 3));
printf( "5.负值取全部:%s \n ",StringSubU( "a哈aa哈a", 4,- 1));
return 0;
}
#include <stdio.h>
/*处理中文字符*/
/*遍历字符串,非ASCII字符读取2个字节,ASCII读取一个字节,获取字符串长度*/
int StrLenU(const char* string)
{
int len = 0 ;
const char* p = string;
while(*p++ != '\0')
{
if(*p > 0x80 || *p < 0)
{
p++;
}
len++;
}
return len;
}
/*遍历字符串,非ASCII字符读取2个字节,ASCII读取一个字节,返回指定位置的字符串指针,默认从1开始*/
char* StrSetPosU(const char* string,int pos)
{
char* result;
result = string;
while (result != NULL && *result != '\0' && pos > 1)
{
if(*result > 0x80 || *result < 0)
{
result++;
}
result++;
pos--;
}
if(pos!= 0)
return result;
return '\0';
}
/*获取指定内存中的字符串个数,中文字符作为一个字符*/
int StrLenMemU(const char* string,int size)
{
int len = 0 ;
const char* p = string;
while(*p++ != '\0' && size > 0)
{
if(*p > 0x80 || *p < 0)
{
p++;
size--;
}
size-- ;
len++;
}
return len;
}
/*可取中文字符串,当number为-1等负数时,取从start开始的剩余所有字符,默认从1开始*/
char* StringSubU(const char* string,int start,int number)
{
int len = StrLenU(string) ;
if(start>len)
{
printf( "Start %d is too big than string length %d! \n ",start,len);
return NULL;
}
int bufsize = 0;
int num = number;
const char* p = string;
const char* start_char =string;
/*重置指针,获取指定开始位置*/
p = StrSetPosU(string,start);
start_char = p;
/*当取值为负值时,则取全部值*/
if(number < 0)
{
while(*p != '\0')
{
p++;
bufsize++;
}
}
else
{
while( 1)
{
/*当指针移到末尾,而且还没有获取指定数的字符时,说明此时指定字符数过多,将会取剩下的所有值*/
if(*p == '\0' && num > 0)
{
printf( "Number : %d is to big! \n ",number);
break;
}
/*当num为0时,说明读取字符已经满足要求*/
else if(num == 0 )
break;
/*当字符为ASCII时,*/
if(*p > 0x80 || *p < 0)
{
bufsize++;
p++;
}
bufsize++;
p++;
num--;
}
}
num = bufsize;
/*开始分配内存*/
char* result ;
result = (char*)malloc(sizeof(char)*(bufsize+ 1));
memset(result, 0,sizeof(char)*(bufsize+ 1));
/*开始复制字符串*/
int i = 0;
int j = 0;
while(num != 0)
{
result[i++] = start_char[j++];
num--;
}
/*尾部置零*/
result[bufsize] = '\0';
return result;
}
int main()
{
/*进行测试*/
char* t = "a哈哈aab和c哈";
printf( "length: %d \n ",StrLenU( "哈哈a哈a哈"));
printf( "指向前%s \n 指向后:%s \n ",t,StrSetPosU(t, 3));
printf( "全字符时字符个数:%d \n ",StrLenMemU(t, 6));
printf( "半个字符时字符个数:%d \n ",StrLenMemU(t, 4));
printf( "1.正常取值:%s \n ",StringSubU( "a哈aa哈a", 1, 2));
printf( "2.负值取值:%s \n ",StringSubU( "a哈aa哈a",- 1, 2));
printf( "3.起始值过大:%s \n ",StringSubU( "a哈aa哈a", 7, 2));
printf( "4.取值过大:%s \n ",StringSubU( "a哈aa哈a", 5, 3));
printf( "5.负值取全部:%s \n ",StringSubU( "a哈aa哈a", 4,- 1));
return 0;
}