标题:常见字符串处理代码示例
测试环境:boost 1.55、boost 1.57
注意:部份代码依赖Windows API
[1]删除指定前后缀的字符
#include <boost/algorithm/string.hpp>
...
boost::trim_if(vecRec[0], boost::is_any_of("\" \n\r\t'"));
[2]分割字符串
#include <boost/algorithm/string/split.hpp>
std::vector<std::string> vecRec;
boost::split(vecRec, vecDst[i], boost::is_any_of(":"));
[3]字符替换
x字符替换为y。
std::replace( s.begin(), s.end(), 'x', 'y');
头文件
#pragma once
#include <string>
#include <vector>
#include <map>
namespace StringHelper
{
void SplitStr(const std::string& s, std::string& delim,std::vector<std::string> &ret);
void Str2Map(const std::string strSrc, std::map<std::string, std::string> &mapKeyValue);
std::string UTF8ToGBK(const std::string &unicode);
std::string GBKToUTF8(const std::string& gbk);
std::wstring s2ws_UTF8ToGBK(std::string sUTF8);
std::wstring s2ws_gbk(const std::string& s);
std::string ws2s_gbk(const std::wstring& s);
void string_replace( std::wstring &strBig, const std::wstring &strsrc, const std::wstring &strdst );
std::string encodeURI(std::string strSrc);
std::string delHeapMemory(char *p);
};
实现文件
#include "StringHelper.h"
#include <Windows.h>
#include <algorithm>
#include <boost/regex.hpp>
#include <boost/algorithm/string.hpp>
#include <locale>
#include <codecvt>
#include <sstream>
using namespace std;
/*
VC++ 2008 SP1提供了个编译选项,可以默认就构造出UTF8的string。
#pragma execution_character_set("utf-8")
另外C++11定义了u8,类似L
std::string nstr = u8"123,我是谁?";
但VS2010SP1并不支持。
*/
namespace StringHelper
{
void SplitStr(const std::string& s, std::string& delim,std::vector<std::string> &ret)
{
size_t last = 0;
size_t index=s.find_first_of(delim,last);
while (index!=std::string::npos)
{
ret.push_back(s.substr(last,index-last));
last=index+1;
index=s.find_first_of(delim,last);
if (index-last>0)
{
ret.push_back(s.substr(last,index-last));
}
}
}//end func
void Str2Map( const std::string strSrc, std::map<std::string, std::string> &mapKeyValue )
{
std::vector<std::string> line;
boost::split(line, strSrc, boost::is_any_of(";"));
for (unsigned int i = 0; i < line.size(); i++)
{
std::vector<std::string> vecT;
boost::split(vecT, line[i], boost::is_any_of("="));
if (vecT.size() == 2)
{
vecT[0] = vecT[0].substr(vecT[0].find_first_not_of(' '), vecT[0].find_last_not_of(' '));
mapKeyValue[vecT[0]] = vecT[1];
}//end if
}//end for
}
wstring s2ws_UTF8ToGBK(string sUTF8)
{
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
std::wstring wsR = conv.from_bytes(sUTF8);
std::locale::global(std::locale("Chinese-simplified"));
wstringstream wss;
wss<<wsR;
return wss.str();
}
std::string UTF8ToGBK( const std::string &strUTF8 )
{
try
{
std::wstring gbk = s2ws_UTF8ToGBK(strUTF8);
std::string sGBK = ws2s_gbk(gbk);
return sGBK;
}
catch (...)
{
}
return "";
}
std::string GBKToUTF8(const std::string& gbk)
{
std::string strOutUTF8 = "";
WCHAR * str1;
int n = MultiByteToWideChar(CP_ACP, 0, gbk.c_str(), -1, NULL, 0);
str1 = new WCHAR[n];
MultiByteToWideChar(CP_ACP, 0, gbk.c_str(), -1, str1, n);
n = WideCharToMultiByte(CP_UTF8, 0, str1, -1, NULL, 0, NULL, NULL);
char * str2 = new char[n];
WideCharToMultiByte(CP_UTF8, 0, str1, -1, str2, n, NULL, NULL);
strOutUTF8 = str2;
delete[]str1;
str1 = NULL;
delete[]str2;
str2 = NULL;
return strOutUTF8;
}
std::wstring s2ws_gbk( const std::string& s )
{
int len;
int slength = (int)s.length() + 1;
len = MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, 0, 0);
wchar_t* buf = new wchar_t[len];
MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, buf, len);
std::wstring r(buf);
delete[] buf;
return r;
}
std::string ws2s_gbk(const std::wstring& ws)
{
std::string curLocale = setlocale(LC_ALL, NULL); // curLocale = "C";
setlocale(LC_ALL, "chs");
const wchar_t* _Source = ws.c_str();
size_t _Dsize = 2 * ws.size() + 1;
char *_Dest = new char[_Dsize];
memset(_Dest,0,_Dsize);
wcstombs(_Dest,_Source,_Dsize);
std::string result = _Dest;
delete []_Dest;
setlocale(LC_ALL, curLocale.c_str());
return result;
}
void string_replace( std::wstring &strBig, const std::wstring &strsrc, const std::wstring &strdst )
{
std::wstring::size_type pos = 0;
std::wstring::size_type srclen = strsrc.size();
std::wstring::size_type dstlen = strdst.size();
while( (pos=strBig.find(strsrc, pos)) != std::string::npos )
{
strBig.replace( pos, srclen, strdst );
pos += dstlen;
}
}
std::string delHeapMemory( char *p )
{
std::string temp = p;
delete p;
return temp;
}
BOOL UrlEncode(const char* szSrc, char* pBuf, int cbBufLen, BOOL bUpperCase)
{
if (szSrc == NULL || pBuf == NULL || cbBufLen <= 0)
return FALSE;
size_t len_ascii = strlen(szSrc);
if (len_ascii == 0)
{
pBuf[0] = 0;
return TRUE;
}
//先转换到UTF-8
char baseChar = bUpperCase ? 'A' : 'a';
int cchWideChar = MultiByteToWideChar(CP_ACP, 0, szSrc, len_ascii, NULL, 0);
LPWSTR pUnicode = (LPWSTR)malloc((cchWideChar + 1) * sizeof(WCHAR));
if (pUnicode == NULL)
return FALSE;
MultiByteToWideChar(CP_ACP, 0, szSrc, len_ascii, pUnicode, cchWideChar + 1);
int cbUTF8 = WideCharToMultiByte(CP_UTF8, 0, pUnicode, cchWideChar, NULL, 0, NULL, NULL);
LPSTR pUTF8 = (LPSTR)malloc((cbUTF8 + 1) * sizeof(CHAR));
if (pUTF8 == NULL)
{
free(pUnicode);
return FALSE;
}
WideCharToMultiByte(CP_UTF8, 0, pUnicode, cchWideChar, pUTF8, cbUTF8 + 1, NULL, NULL);
pUTF8[cbUTF8] = '\0';
unsigned char c;
int cbDest = 0; //累加
unsigned char *pSrc = (unsigned char*)pUTF8;
unsigned char *pDest = (unsigned char*)pBuf;
while (*pSrc && cbDest < cbBufLen - 1)
{
c = *pSrc;
if (isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '~')
{
*pDest = c;
++pDest;
++cbDest;
}
else if (c == ' ')
{
*pDest = '+';
++pDest;
++cbDest;
}
else
{
//检查缓冲区大小是否够用?
if (cbDest + 3 > cbBufLen - 1)
break;
pDest[0] = '%';
pDest[1] = (c >= 0xA0) ? ((c >> 4) - 10 + baseChar) : ((c >> 4) + '0');
pDest[2] = ((c & 0xF) >= 0xA) ? ((c & 0xF) - 10 + baseChar) : ((c & 0xF) + '0');
pDest += 3;
cbDest += 3;
}
++pSrc;
}
//null-terminator
*pDest = '\0';
free(pUnicode);
free(pUTF8);
return TRUE;
}
string encodeURI(string strSrc)
{
char *buf[512];
memset(buf, 0, sizeof(buf));
UrlEncode(strSrc.c_str(), (char*)(buf), sizeof(buf), TRUE);
string dst((char *)buf);
return dst;
}
};
字符串转大写(或小写)
http://www.cnblogs.com/mmix2009/archive/2013/07/19/3200150.html