String类型是.net中极其重要且常用的基础数据类型,其内部数据结构实现如下:标识字符串长度的m_stringLength,以及字符串的第一个字符m_FirstChar,注意类型是char。
public sealed class String : IComparable, ICloneable, IConvertible, IComparable<string>, IEnumerable<char>, IEnumerable, IEquatable<string>
{
[NonSerialized]
private int m_stringLength;
[ForceTokenStabilization]
[NonSerialized]
private char m_firstChar;
/// <summary>
/// Represents the empty string. This field is read-only.
/// </summary>
/// <filterpriority>1</filterpriority>
[__DynamicallyInvokable]
public static readonly string Empty;
private const int TrimHead = 0;
private const int TrimTail = 1;
private const int TrimBoth = 2;
private const int charPtrAlignConst = 1;
private const int alignConst = 3;
字符串的基本操作如下:
- Contact(string, string),连接两字符串
- Equals(string),判断两字符串是否值相等
- Remove(int,int),删除指定位置的字符串
- Replace(string,string),替换
- Split(char[] separator,),分割成数组
- SubString(int,int),截取子串
- Trim(),去除两边的空格
支撑这些函数的除了一些internal函数外,还有两个unsafe函数:wcslen()和wcstrcpy()用于取字符串长度和按指针拷贝字符串。
Contact两个字符串的实现比较简单:首先判断入参的合法性,然后申请一个长度为两个字符串长度之和的新字符串,然后依次填进正确的位置。
注意:函数中的FastAllocateString()函数是一个CLR内部函数。
public static string Concat(string str0, string str1)
{
if (string.IsNullOrEmpty(str0))
{
if (string.IsNullOrEmpty(str1))
return string.Empty;
else
return str1;
}
else
{
if (string.IsNullOrEmpty(str1))
return str0;
int length = str0.Length;
string dest = string.FastAllocateString(length + str1.Length);
string.FillStringChecked(dest, 0, str0);
string.FillStringChecked(dest, length, str1);
return dest;
}
}
而FillStringChecked填充字符串的实现是依赖wstrcpy的:
private static unsafe void FillStringChecked(string dest, int destPos, string src)
{
if (src.Length > dest.Length - destPos)
throw new IndexOutOfRangeException();
fixed (char* chPtr = &dest.m_firstChar)
fixed (char* smem = &src.m_firstChar)
string.wstrcpy(chPtr + destPos, smem, src.Length);
}
而wstrcpy的实现又是依赖Buffer类的Memcpy函数:wstrcpy()函数的目的是将指定长度的字符从A复制到B,而入参是字符串的指针char*。
internal static unsafe void wstrcpy(char* dmem, char* smem, int charCount)
{
Buffer.Memcpy((byte*) dmem, (byte*) smem, charCount * 2);
}
注意这里第3个参数乘以2,Memcpy的实现有些复杂,按照入参的byte的位数做了相应的优化。
internal static unsafe void Memcpy(byte* dest, byte* src, int len)
{
switch (len)
{
case 0:
break;
case 1:
*dest = *src;
break;
case 2:
*(short*) dest = *(short*) src;
break;
case 3:
*(short*) dest = *(short*) src;
dest[2] = src[2];
break;
case 4:
*(int*) dest = *(int*) src;
break;
case 5:
*(int*) dest = *(int*) src;
dest[4] = src[4];
break;
case 6:
*(int*) dest = *(int*) src;
*(short*) (dest + 4) = *(short*) (src + 4);
break;
case 7:
*(int*) dest = *(int*) src;
*(short*) (dest + 4) = *(short*) (src + 4);
dest[6] = src[6];
break;
case 8:
*(int*) dest = *(int*) src;
*(int*) (dest + 4) = *(int*) (src + 4);
break;
case 9:
*(int*) dest = *(int*) src;
*(int*) (dest + 4) = *(int*) (src + 4);
dest[8] = src[8];
break;
case 10:
*(int*) dest = *(int*) src;
*(int*) (dest + 4) = *(int*) (src + 4);
*(short*) (dest + 8) = *(short*) (src + 8);
break;
case 11:
*(int*) dest = *(int*) src;
*(int*) (dest + 4) = *(int*) (src + 4);
*(short*) (dest + 8) = *(short*) (src + 8);
dest[10] = src[10];
break;
case 12:
*(int*) dest = *(int*) src;
*(int*) (dest + 4) = *(int*) (src + 4);
*(int*) (dest + 8) = *(int*) (src + 8);
break;
case 13:
*(int*) dest = *(int*) src;
*(int*) (dest + 4) = *(int*) (src + 4);
*(int*) (dest + 8) = *(int*) (src + 8);
dest[12] = src[12];
break;
case 14:
*(int*) dest = *(int*) src;
*(int*) (dest + 4) = *(int*) (src + 4);
*(int*) (dest + 8) = *(int*) (src + 8);
*(short*) (dest + 12) = *(short*) (src + 12);
break;
case 15:
*(int*) dest = *(int*) src;
*(int*) (dest + 4) = *(int*) (src + 4);
*(int*) (dest + 8) = *(int*) (src + 8);
*(short*) (dest + 12) = *(short*) (src + 12);
dest[14] = src[14];
break;
case 16:
*(int*) dest = *(int*) src;
*(int*) (dest + 4) = *(int*) (src + 4);
*(int*) (dest + 8) = *(int*) (src + 8);
*(int*) (dest + 12) = *(int*) (src + 12);
break;
default:
if (len >= 512)
{
Buffer._Memcpy(dest, src, len);
break;
}
else
{
if (((int) dest & 3) != 0)
{
if (((int) dest & 1) != 0)
{
*dest = *src;
++src;
++dest;
--len;
if (((int) dest & 2) == 0)
goto label_25;
}
*(short*) dest = *(short*) src;
src += 2;
dest += 2;
len -= 2;
}
label_25:
for (int index = len / 16; index > 0; --index)
{
*(int*) dest = *(int*) src;
*(int*) (dest + 4) = *(int*) (src + 4);
*(int*) (dest + 8) = *(int*) (src + 8);
*(int*) (dest + 12) = *(int*) (src + 12);
dest += 16;
src += 16;
}
if ((len & 8) != 0)
{
*(int*) dest = *(int*) src;
*(int*) (dest + 4) = *(int*) (src + 4);
dest += 8;
src += 8;
}
if ((len & 4) != 0)
{
*(int*) dest = *(int*) src;
dest += 4;
src += 4;
}
if ((len & 2) != 0)
{
*(short*) dest = *(short*) src;
dest += 2;
src += 2;
}
if ((len & 1) == 0)
break;
*dest++ = *src++;
break;
}
}
}
判断字符串是否值相等的实现如下:判断自身和入参是否为空后, 接下来判断引用的是否是同一个对象(暂不太明白),再判断长度是否相等,最后判断值是否相等。
public bool Equals(string value)
{
if (this == null)
throw new NullReferenceException();
if (value == null)
return false;
if (object.ReferenceEquals((object) this, (object) value))
return true;
if (this.Length != value.Length)
return false;
else
return string.EqualsHelper(this, value);
}
EqualsHelper()判断两个字符串的值是否相等的实现如下:有些复杂。 没太看明白
private static unsafe bool EqualsHelper(string strA, string strB)
{
int length = strA.Length;
fixed (char* chPtr1 = &strA.m_firstChar)
fixed (char* chPtr2 = &strB.m_firstChar)
{
char* chPtr3 = chPtr1;
char* chPtr4 = chPtr2;
while (length >= 10)
{
if (*(int*) chPtr3 != *(int*) chPtr4 || *(int*) (chPtr3 + 2) != *(int*) (chPtr4 + 2) || (*(int*) (chPtr3 + 4) != *(int*) (chPtr4 + 4) || *(int*) (chPtr3 + 6) != *(int*) (chPtr4 + 6)) || *(int*) (chPtr3 + 8) != *(int*) (chPtr4 + 8))
return false;
chPtr3 += 10;
chPtr4 += 10;
length -= 10;
}
while (length > 0 && *(int*) chPtr3 == *(int*) chPtr4)
{
chPtr3 += 2;
chPtr4 += 2;
length -= 2;
}
return length <= 0;
}
}
Remove函数的实现是这样的,思路是申请一个长度为原字符串长度减去需要移除的长度的新字符串,然后去除需要移除的部分,然后返回新字符串。
public unsafe string Remove(int startIndex, int count)
{
if (startIndex < 0)
throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_StartIndex"));
if (count < 0)
throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_NegativeCount"));
if (count > this.Length - startIndex)
throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_IndexCount"));
int length = this.Length - count;
if (length == 0)
return string.Empty;
string str = string.FastAllocateString(length);
fixed (char* smem = &this.m_firstChar)
fixed (char* dmem = &str.m_firstChar)
{
string.wstrcpy(dmem, smem, startIndex);
string.wstrcpy(dmem + startIndex, smem + startIndex + count, length - startIndex);
}
return str;
}
SubString的实现如下:基本上利用wstrcpy复制相应位置的数据到新的字符串。
private unsafe string InternalSubString(int startIndex, int length)
{
string str = string.FastAllocateString(length);
fixed (char* dmem = &str.m_firstChar)
fixed (char* chPtr = &this.m_firstChar)
string.wstrcpy(dmem, chPtr + startIndex, length);
return str;
}
Trim的思路也基本一样,找准新字符串的位置,然后用wstrcpy组建新字符串。
具体不再表。
而重载的运算符 == 其实就是Equals的实现。
public static bool operator ==(string a, string b)
{
return string.Equals(a, b);
}
以上。