最近要支持字符串的特殊处理,提供的处理方案仅有C#版的实现,需要实现C++版本,记录实现过程中替换的的一些C#接口,以备后查。
#include <iostream>
#include <vector>
#include <string>
#include <cctype>
#include <sstream>
#include <codecvt>
typedef char16_t my_sch_ar;
namespace mytool{
static std::vector<std::u16string> splitStr(const std::u16string& input, my_sch_ar delimiter) {
std::vector<std::u16string> result;
std::u16string str = input;
while (!str.empty()) {
int ind = str.find_first_of(delimiter);
if (ind == -1) {
result.push_back(str);
str.clear();
}
else if (ind == 0)
{
str = str.substr(ind + 1);
}
else {
result.push_back(str.substr(0, ind));
str = str.substr(ind + 1);
}
}
return result;
}
//对应C#中 String[] Split(params char[] separator);
static std::vector<std::u16string> splitStr(const std::u16string& input, const std::vector<my_sch_ar>& delimiterVec) {
std::vector<std::u16string> resultVec, tmpVec;
for (auto c : delimiterVec)
{
if (resultVec.empty())
{
resultVec = splitStr(input, c);
}
else
{
std::swap(tmpVec, resultVec);
for (auto newStr : tmpVec)
{
auto vec = splitStr(newStr, c);
resultVec.insert(resultVec.end(), vec.begin(), vec.end());
}
tmpVec.clear();
}
}
return resultVec;
}
static bool containsStr(const std::u16string& content, const std::u16string& childStr)
{
const int count1 = content.size();
const int count2 = childStr.size();
if (count1 > count2)
{
for (size_t i = 0; i < count1 - count2; i++)
{
if (childStr == content.substr(i, count2)) {
return true;
}
}
}
return false;
}
static void replaceStr(std::u16string& inStr, std::u16string oldStr, std::u16string newStr)
{
while (true)
{
auto pos = inStr.find(oldStr);
if (pos != std::u16string::npos) {
inStr = inStr.replace(pos, oldStr.size(), newStr);
}
else {
break;
}
}
}
static std::vector<my_sch_ar> strToCharVec(const std::u16string& inStr)
{
std::vector<my_sch_ar> vec;
for (auto a:inStr)
{
vec.push_back(a);
}
return vec;
}
//对应C#中的char.IsNumber
static bool IsNumber(my_sch_ar c) {
return (c >= '0' && c <= '9');
}
//对应C#中的char.IsLower
static bool IsLower(my_sch_ar c) {
return (c >= 'a' && c <= 'z');
}
//对应C#中的char.IsUpper
static bool IsUpper(my_sch_ar c) {
return (c >= 'A' && c <= 'Z');
}
//对应C#中的char.IsPunctuation
static bool IsPunctuation(my_sch_ar c) {
bool v = iswpunct(c);
return v;
}
//对应C#中的char.IsSymbol
static bool IsSymbol(my_sch_ar c) {
// 符号字符的Unicode编码范围
my_sch_ar* symbols = u"!@#$%^&*()_+-=[]{}|:;,.<>/?\\~`";
const my_sch_ar* symbolStart = symbols;
const my_sch_ar* symbolEnd = symbols + std::wcslen(reinterpret_cast<wchar_t*>(symbols));
// 检查字符是否在符号字符的Unicode编码范围内
while (symbolStart < symbolEnd) {
if (c == *symbolStart) {
return true;
}
symbolStart++;
}
return false;
}
//对应C#中 Convert.ToString(int value, int toBase)【toBase=16】
static std::u16string ConvertToString(int number) {
std::stringstream ss;
ss << std::hex << number;
std::string result = ss.str();
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter;
std::u16string utf16_str = converter.from_bytes(result);
return utf16_str;
}
};
特别注意:
在C#中,string
类型是基于UTF-16编码的,因此,如果参考的处理方案是基于UTF-16的,C#中的string对应到C++中应使用 std::u16string。调用接口时先将std::string转换成std::u16string,处理结束再将std::u16string转换成std::string。
C++中UTF-8和UTF-16的相互转换:
#include <codecvt>
int main()
{
std::string str = "abcd";
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter;
// 转换为UTF-16字符串
std::u16string utf16_str = converter.from_bytes(str);
// 转换为UTF-8字符串
std::string utf8_str = converter.to_bytes(utf16_str);
getchar();
return 0;
}