截取中英混合的UTF8字符串
--截取中英混合的UTF8字符串,endIndex可缺省
function SubStringUTF8(str, startIndex, endIndex)
if startIndex < 0 then
startIndex = SubStringGetTotalIndex(str) + startIndex + 1;
end
if endIndex ~= nil and endIndex < 0 then
endIndex = SubStringGetTotalIndex(str) + endIndex + 1;
end
if endIndex == nil then
return string.sub(str, SubStringGetTrueIndex(str, startIndex));
else
return string.sub(str, SubStringGetTrueIndex(str, startIndex), SubStringGetEndIndex(str, endIndex));
end
end
--获取中英混合UTF8字符串的真实字符数量
function SubStringGetTotalIndex(str)
local curIndex = 0;
local i = 1;
local lastCount = 1;
repeat
lastCount = SubStringGetByteCount(str, i)
i = i + lastCount;
curIndex = curIndex + 1;
until (lastCount == 0)
return curIndex - 1;
end
function SubStringGetTrueIndex(str, index)
local curIndex = 0;
local i = 1;
local lastCount = 1;
repeat
lastCount = SubStringGetByteCount(str, i)
i = i + lastCount;
curIndex = curIndex + 1;
until (curIndex >= index)
return i - lastCount;
end
function SubStringGetEndIndex(str, index)
local curIndex = 0;
local i = 1;
local lastCount = 1;
repeat
lastCount = SubStringGetByteCount(str, i)
i = i + lastCount;
curIndex = curIndex + lastCount;
until (curIndex >= index or lastCount == 0)
if curIndex > index then
curIndex = curIndex - lastCount
end
return curIndex;
end
--返回当前字符实际占用的字符数
function SubStringGetByteCount(str, index)
local curByte = string.byte(str, index)
local byteCount = 1;
if curByte == nil then
byteCount = 0
elseif curByte > 239 then
byteCount = 4 -- 4字节字符
elseif curByte > 223 then
byteCount = 3 -- 汉字
elseif curByte > 128 then
byteCount = 2 -- 双字节字符
else
byteCount = 1 -- 单字节字符
end
return byteCount;
end