原本在C#中字符串截取很简单,使用string.Substring()
这个方法就可以了,但这在纯英文状态下没有问题,如果有中文或者全部中文的情况下,就会出现问题。因为string.Substring()
将中文也按一个字符计算,导致我们在实际应用中截取字符串(中英文组合)后的“长度”不一致。
所以就有了下面的这个方法,大致原理是将字符串转化为Char数组,如果是中文的话,长度加2,否则长度+1。
/// <summary>
/// 截取字符串
/// </summary>
/// <param name="inputString">要截取的字符串</param>
/// <param name="maxLength">截取长度</param>
/// <param name="endString">需要拼接的字符</param>
/// <returns></returns>
public static string CutString(string inputString, int maxLength, string endString = "")
{
if (string.IsNullOrEmpty(inputString))
{
return inputString;
}
;
if (maxLength > 0)
{
var decodedInputString = HttpUtility.HtmlDecode(inputString);
inputString = decodedInputString;
var totalLength = maxLength * 2;
var length = 0;
var builder = new StringBuilder();
var isOneBytesChar = false;
var lastChar = ' ';
foreach (var singleChar in inputString.ToCharArray())
{
builder.Append(singleChar);
if (IsTwoBytesChar(singleChar))
{
length += 2;
if (length >= totalLength)
{
lastChar = singleChar;
break;
}
}
else
{
length += 1;
if (length == totalLength)
{
isOneBytesChar = true;//已经截取到需要的字数,再多截取一位
}
else if (length > totalLength)
{
lastChar = singleChar;
break;
}
else
{
isOneBytesChar = !isOneBytesChar;
}
}
}
if (isOneBytesChar && length > totalLength)
{
builder.Length--;
var theStr = builder.ToString();
if (char.IsLetter(lastChar))
{
for (var i = theStr.Length - 1; i > 0; i--)
{
var theChar = theStr[i];
if (!IsTwoBytesChar(theChar) && char.IsLetter(theChar))
{
inputString = inputString.Substring(0, i - 1);
}
else
{
break;
}
}
}
}
else
{
inputString = builder.ToString();
}
var isCut = decodedInputString != inputString;
inputString = HttpUtility.HtmlEncode(inputString);
if (isCut && endString != null)
{
inputString += endString;
}
}
return inputString;
}
/// <summary>
/// 判断是否是中文
/// </summary>
/// <param name="chr">char</param>
/// <returns></returns>
private static bool IsTwoBytesChar(char chr)
{
return Encoding.GetEncoding("gb2312").GetByteCount(new[] { chr }) == 2;
}