前几天,需要对输入的xml节点的数据进行html转义,否则导致后续操作失败.
节点内容是第三方的, 有可能输入所有可见字符.
前段时间,只对几个常用的html字符进行了转义. 结果导致不时的会有一个需要转义的字符在输入内容中出现.
这几天,可以整理工程了。对html串转义操作,进行了封装。以后再也不会出现由于未进行html字符串转义引起的操作失败.
html转义字符集的定义 : http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
工程下载点: testcase_HtmlEscape_2015_0415_1722.zip
// testcase_HtmlEscape.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include "HtmlEscapeHelper/HtmlEscapeHelper.h"
int _tmain(int argc, _TCHAR* argv[])
{
int iIndex = 0;
std::wstring strHtml = L"";
std::wstring strHtmlEscape = L"";
std::wstring strTemp = L"";
// _tsetlocale( LC_ALL, _T( "chs" ) );
_tsetlocale(LC_CTYPE, L".936"); //< 控制台为中文输出
ns_base::init_string_convert_escape_character();
// TCHAR pszTest[] = _T("123Abc汉字€鴕㎜㎎㊣㎞㏄㏑㏕㏒");
// pszTest[1] = (WCHAR)169; ///< 需要转义的字符打印不出来啊, 显示为 ? ...
// pszTest[2] = (WCHAR)174;
// _tprintf(L"%s\r\n", pszTest);
// strTemp = pszTest;
// _tprintf(L"%s\r\n", strTemp.c_str());
/// build a html string have chars need escape
strHtml = L"http://test.com/index.php&name=";
strHtml += (WCHAR)169; ///< ©
strHtml += L"someone";
strHtml += (WCHAR)174; ///< ®
strHtml += L"\0";
_tprintf(L"%s\r\n", strHtml.c_str());
/// 这段转义后的内容, 主要用于xml节点内容
_tprintf(L"html escape begin:\r\n");
strHtmlEscape = ns_base::string_convert_escape_character_w(strHtml);
_tprintf(L"before escape: %s\r\n after escape: %s\r\n",
strHtml.c_str(),
strHtmlEscape.c_str());
ns_base::uninit_string_convert_escape_character();
printf("结束, press any key to quit\r\n");
/** run result
http://test.com/index.php&name=?someone?
html escape begin:
before escape: http://test.com/index.php&name=?someone?
after escape: http://test.com/index.php&name=©someone®
结束, press any key to quit
*/
getwchar();
return 0;
}
/// @file HtmlEscapeHelper.h
/// @brief 对html字符串进行转义
#include "stdafx.h"
#ifndef __HTML_ESCAPE_HELPER_H__
#define __HTML_ESCAPE_HELPER_H__
namespace ns_base
{
/// 为了查的快, 弄了一个new出来的大数组, 数组成员类型是 TAG_HTMLESCAPE_TBL_ITEM
/// 里面只有一部分是有效的
/// 当程序启动时, new数组 !
/// 当程序退出时, delete数组 !
void init_string_convert_escape_character();
void uninit_string_convert_escape_character();
/// 将一个字符串进行html转义
std::wstring string_convert_escape_character_w(IN std::wstring strIn);
std::string string_convert_escape_character_a(IN std::string strIn);
/// 将一个字符串进行html转义
BOOL string2HtmlEscape(IN const std::wstring& strIn, OUT std::wstring& strHtmlEscape);
/// 将一个字符进行html转义
BOOL char2HtmlEscape(IN WCHAR cIn, OUT std::wstring& strHtmlEscape);
/// 是否为html转义字符
BOOL IsHtmlEscape(IN WCHAR cIn);
}
#endif // #ifndef __HTML_ESCAPE_HELPER_H__
/// @file HtmlEscapeHelper.cpp
#include "stdafx.h"
#include "HtmlEscapeHelper.h"
#include "StringHelper.h"
#include "constDefine_BaseSupportLayer.h"
/// 放html字符和转义字符串对应关系的结构体
typedef struct _tag_HtmlEscape_Tbl_Item
{
BOOL bValidData; ///< 该数据是否有效
WCHAR cValue; ///< 字符值
std::wstring strHtmlEscape; ///< 对应的转义字符
_tag_HtmlEscape_Tbl_Item()
{
bValidData = FALSE;
cValue = L'\0';
strHtmlEscape = L"";
}
_tag_HtmlEscape_Tbl_Item(WCHAR cValue, WCHAR* pcHtmlEscape)
{
this->bValidData = TRUE;
this->cValue = cValue;
this->strHtmlEscape = (NULL != pcHtmlEscape) ? pcHtmlEscape : L"";
}
}TAG_HTMLESCAPE_TBL_ITEM;
TAG_HTMLESCAPE_TBL_ITEM* g_pArr_tag_htmlescape_tbl_item = NULL;
/// 完整的转义字符表定义出处
/// http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
const int g_iSize_HtmlEscapeTbl = 253;
const TAG_HTMLESCAPE_TBL_ITEM g_HtmlEscapeTbl[g_iSize_HtmlEscapeTbl] =
{
TAG_HTMLESCAPE_TBL_ITEM(34, L"""),
TAG_HTMLESCAPE_TBL_ITEM(38, L"&"),
TAG_HTMLESCAPE_TBL_ITEM(39, L"'"),
TAG_HTMLESCAPE_TBL_ITEM(60, L"<"),
TAG_HTMLESCAPE_TBL_ITEM(62, L">"),
TAG_HTMLESCAPE_TBL_ITEM(160, L" "),
TAG_HTMLESCAPE_TBL_ITEM(161, L"¡"),
TAG_HTMLESCAPE_TBL_ITEM(162, L"¢"),
TAG_HTMLESCAPE_TBL_ITEM(163, L"£"),
TAG_HTMLESCAPE_TBL_ITEM(164, L"¤"),
TAG_HTMLESCAPE_TBL_ITEM(165, L"¥"),
TAG_HTMLESCAPE_TBL_ITEM(166, L"¦"),
TAG_HTMLESCAPE_TBL_ITEM(167, L"§"),
TAG_HTMLESCAPE_TBL_ITEM(168, L"¨"),
TAG_HTMLESCAPE_TBL_ITEM(169, L"©"),
TAG_HTMLESCAPE_TBL_ITEM(170, L"ª"),
TAG_HTMLESCAPE_TBL_ITEM(171, L"«"),
TAG_HTMLESCAPE_TBL_ITEM(172, L"¬"),
TAG_HTMLESCAPE_TBL_ITEM(173, L""),
TAG_HTMLESCAPE_TBL_ITEM(174, L"®"),
TAG_HTMLESCAPE_TBL_ITEM(175, L"¯"),
TAG_HTMLESCAPE_TBL_ITEM(176, L"°"),
TAG_HTMLESCAPE_TBL_ITEM(177, L"±"),
TAG_HTMLESCAPE_TBL_ITEM(178, L"²"),
TAG_HTMLESCAPE_TBL_ITEM(179, L"³"),
TAG_HTMLESCAPE_TBL_ITEM(180, L"´"),
TAG_HTMLESCAPE_TBL_ITEM(181, L"µ"),
TAG_HTMLESCAPE_TBL_ITEM(182, L"¶"),
TAG_HTMLESCAPE_TBL_ITEM(183, L"·"),
TAG_HTMLESCAPE_TBL_ITEM(184, L"¸"),
TAG_HTMLESCAPE_TBL_ITEM(185, L"¹"),
TAG_HTMLESCAPE_TBL_ITEM(186, L"º"),
TAG_HTMLESCAPE_TBL_ITEM(187, L"»"),
TAG_HTMLESCAPE_TBL_ITEM(188, L"¼"),
TAG_HTMLESCAPE_TBL_ITEM(189, L"½"),
TAG_HTMLESCAPE_TBL_ITEM(190, L"¾"),
TAG_HTMLESCAPE_TBL_ITEM(191, L"¿"),
TAG_HTMLESCAPE_TBL_ITEM(192, L"À"),
TAG_HTMLESCAPE_TBL_ITEM(193, L"Á"),
TAG_HTMLESCAPE_TBL_ITEM(194, L"ˆ"),
TAG_HTMLESCAPE_TBL_ITEM(195, L"Ã"),
TAG_HTMLESCAPE_TBL_ITEM(196, L"Ä"),
TAG_HTMLESCAPE_TBL_ITEM(197, L"˚"),
TAG_HTMLESCAPE_TBL_ITEM(198, L"Æ"),
TAG_HTMLESCAPE_TBL_ITEM(199, L"Ç"),
TAG_HTMLESCAPE_TBL_ITEM(200, L"È"),
TAG_HTMLESCAPE_TBL_ITEM(201, L"É"),
TAG_HTMLESCAPE_TBL_ITEM(202, L"Ê"),
TAG_HTMLESCAPE_TBL_ITEM(203, L"Ë"),
TAG_HTMLESCAPE_TBL_ITEM(204, L"Ì"),
TAG_HTMLESCAPE_TBL_ITEM(205, L"Í"),
TAG_HTMLESCAPE_TBL_ITEM(206, L"Î"),
TAG_HTMLESCAPE_TBL_ITEM(207, L"Ï"),
TAG_HTMLESCAPE_TBL_ITEM(208, L"Ð"),
TAG_HTMLESCAPE_TBL_ITEM(209, L"Ñ"),
TAG_HTMLESCAPE_TBL_ITEM(210, L"Ò"),
TAG_HTMLESCAPE_TBL_ITEM(211, L"Ó"),
TAG_HTMLESCAPE_TBL_ITEM(212, L"Ô"),
TAG_HTMLESCAPE_TBL_ITEM(213, L"Õ"),
TAG_HTMLESCAPE_TBL_ITEM(214, L"Ö"),
TAG_HTMLESCAPE_TBL_ITEM(215, L"×"),
TAG_HTMLESCAPE_TBL_ITEM(216, L"Ø"),
TAG_HTMLESCAPE_TBL_ITEM(217, L"Ù"),
TAG_HTMLESCAPE_TBL_ITEM(218, L"Ú"),
TAG_HTMLESCAPE_TBL_ITEM(219, L"Û"),
TAG_HTMLESCAPE_TBL_ITEM(220, L"Ü"),
TAG_HTMLESCAPE_TBL_ITEM(221, L"Ý"),
TAG_HTMLESCAPE_TBL_ITEM(222, L"Þ"),
TAG_HTMLESCAPE_TBL_ITEM(223, L"ß"),
TAG_HTMLESCAPE_TBL_ITEM(224, L"à"),
TAG_HTMLESCAPE_TBL_ITEM(225, L"á"),
TAG_HTMLESCAPE_TBL_ITEM(226, L"â"),
TAG_HTMLESCAPE_TBL_ITEM(227, L"ã"),
TAG_HTMLESCAPE_TBL_ITEM(228, L"ä"),
TAG_HTMLESCAPE_TBL_ITEM(229, L"å"),
TAG_HTMLESCAPE_TBL_ITEM(230, L"æ"),
TAG_HTMLESCAPE_TBL_ITEM(231, L"ç"),
TAG_HTMLESCAPE_TBL_ITEM(232, L"è"),
TAG_HTMLESCAPE_TBL_ITEM(233, L"é"),
TAG_HTMLESCAPE_TBL_ITEM(234, L"ê"),
TAG_HTMLESCAPE_TBL_ITEM(235, L"ë"),
TAG_HTMLESCAPE_TBL_ITEM(236, L"ì"),
TAG_HTMLESCAPE_TBL_ITEM(237, L"í"),
TAG_HTMLESCAPE_TBL_ITEM(238, L"î"),
TAG_HTMLESCAPE_TBL_ITEM(239, L"ï"),
TAG_HTMLESCAPE_TBL_ITEM(240, L"&ieth;"),
TAG_HTMLESCAPE_TBL_ITEM(241, L"ñ"),
TAG_HTMLESCAPE_TBL_ITEM(242, L"ò"),
TAG_HTMLESCAPE_TBL_ITEM(243, L"ó"),
TAG_HTMLESCAPE_TBL_ITEM(244, L"ô"),
TAG_HTMLESCAPE_TBL_ITEM(245, L"õ"),
TAG_HTMLESCAPE_TBL_ITEM(246, L"ö"),
TAG_HTMLESCAPE_TBL_ITEM(247, L"÷"),
TAG_HTMLESCAPE_TBL_ITEM(248, L"ø"),
TAG_HTMLESCAPE_TBL_ITEM(249, L"ù"),
TAG_HTMLESCAPE_TBL_ITEM(250, L"ú"),
TAG_HTMLESCAPE_TBL_ITEM(251, L"û"),
TAG_HTMLESCAPE_TBL_ITEM(252, L"ü"),
TAG_HTMLESCAPE_TBL_ITEM(253, L"ý"),
TAG_HTMLESCAPE_TBL_ITEM(254, L"þ"),
TAG_HTMLESCAPE_TBL_ITEM(255, L"ÿ"),
TAG_HTMLESCAPE_TBL_ITEM(338, L"Œ"), ///< OElig Œ U+0152 (338)
TAG_HTMLESCAPE_TBL_ITEM(339, L"œ"), ///< oelig œ U+0153 (339)
TAG_HTMLESCAPE_TBL_ITEM(352, L"Š"), ///< Scaron Š U+0160 (352)
TAG_HTMLESCAPE_TBL_ITEM(353, L"š"), ///< scaron š U+0161 (353)
TAG_HTMLESCAPE_TBL_ITEM(376, L"Ÿ"), ///< Yuml Ÿ U+0178 (376)
TAG_HTMLESCAPE_TBL_ITEM(402, L"ƒ"), ///< fnof ƒ U+0192 (402)
TAG_HTMLESCAPE_TBL_ITEM(710, L"ˆ"), ///< circ ˆ U+02C6 (710)
TAG_HTMLESCAPE_TBL_ITEM(732, L"˜"), ///< tilde ˜ U+02DC (732)
TAG_HTMLESCAPE_TBL_ITEM(913, L"Α"), ///< Alpha Α U+0391 (913)
TAG_HTMLESCAPE_TBL_ITEM(914, L"Β"), ///< Beta Β U+0392 (914)
TAG_HTMLESCAPE_TBL_ITEM(915, L"Γ"), ///< Gamma Γ U+0393 (915)
TAG_HTMLESCAPE_TBL_ITEM(916, L"Δ"), ///< Delta Δ U+0394 (916)
TAG_HTMLESCAPE_TBL_ITEM(917, L"Ε"), ///< Epsilon Ε U+0395 (917)
TAG_HTMLESCAPE_TBL_ITEM(918, L"Ζ"), ///< Zeta Ζ U+0396 (918)
TAG_HTMLESCAPE_TBL_ITEM(919, L"Η"), ///< Eta Η U+0397 (919)
TAG_HTMLESCAPE_TBL_ITEM(920, L"Θ"), ///< Theta Θ U+0398 (920)
TAG_HTMLESCAPE_TBL_ITEM(921, L"Ι"), ///< Iota Ι U+0399 (921)
TAG_HTMLESCAPE_TBL_ITEM(922, L"Κ"), ///< Kappa Κ U+039A (922)
TAG_HTMLESCAPE_TBL_ITEM(923, L"Λ"), ///< Lambda Λ U+039B (923)
TAG_HTMLESCAPE_TBL_ITEM(924, L"Μ"), ///< Mu Μ U+039C (924)
TAG_HTMLESCAPE_TBL_ITEM(925, L"Ν"), ///< Nu Ν U+039D (925)
TAG_HTMLESCAPE_TBL_ITEM(926, L"Ξ"), ///< Xi Ξ U+039E (926)
TAG_HTMLESCAPE_TBL_ITEM(927, L"Ο"), ///< Omicron Ο U+039F (927)
TAG_HTMLESCAPE_TBL_ITEM(928, L"Π"), ///< Pi Π U+03A0 (928)
TAG_HTMLESCAPE_TBL_ITEM(929, L"Ρ"), ///< Rho Ρ U+03A1 (929)
TAG_HTMLESCAPE_TBL_ITEM(931, L"Σ"), ///< Sigma Σ U+03A3 (931)
TAG_HTMLESCAPE_TBL_ITEM(932, L"Τ"), ///< Tau Τ U+03A4 (932)
TAG_HTMLESCAPE_TBL_ITEM(933, L"Υ"), ///< Upsilon Υ U+03A5 (933)
TAG_HTMLESCAPE_TBL_ITEM(934, L"Φ"), ///< Phi Φ U+03A6 (934)
TAG_HTMLESCAPE_TBL_ITEM(935, L"Χ"), ///< Chi Χ U+03A7 (935)
TAG_HTMLESCAPE_TBL_ITEM(936, L"Ψ"), ///< Psi Ψ U+03A8 (936)
TAG_HTMLESCAPE_TBL_ITEM(937, L"Ω"), ///< Omega Ω U+03A9 (937)
TAG_HTMLESCAPE_TBL_ITEM(945, L"α"), ///< alpha α U+03B1 (945)
TAG_HTMLESCAPE_TBL_ITEM(946, L"β"), ///< beta β U+03B2 (946)
TAG_HTMLESCAPE_TBL_ITEM(947, L"γ"), ///< gamma γ U+03B3 (947)
TAG_HTMLESCAPE_TBL_ITEM(948, L"δ"), ///< delta δ U+03B4 (948)
TAG_HTMLESCAPE_TBL_ITEM(949, L"ε"), ///< epsilon ε U+03B5 (949)
TAG_HTMLESCAPE_TBL_ITEM(950, L"ζ"), ///< zeta ζ U+03B6 (950)
TAG_HTMLESCAPE_TBL_ITEM(951, L"η"), ///< eta η U+03B7 (951)
TAG_HTMLESCAPE_TBL_ITEM(952, L"θ"), ///< theta θ U+03B8 (952)
TAG_HTMLESCAPE_TBL_ITEM(953, L"ι"), ///< iota ι U+03B9 (953)
TAG_HTMLESCAPE_TBL_ITEM(954, L"κ"), ///< kappa κ U+03BA (954)
TAG_HTMLESCAPE_TBL_ITEM(955, L"λ"), ///< lambda λ U+03BB (955)
TAG_HTMLESCAPE_TBL_ITEM(956, L"μ"), ///< mu μ U+03BC (956)
TAG_HTMLESCAPE_TBL_ITEM(957, L"ν"), ///< nu ν U+03BD (957)
TAG_HTMLESCAPE_TBL_ITEM(958, L"ξ"), ///< xi ξ U+03BE (958)
TAG_HTMLESCAPE_TBL_ITEM(959, L"ο"), ///< omicron ο U+03BF (959)
TAG_HTMLESCAPE_TBL_ITEM(960, L"π"), ///< pi π U+03C0 (960)
TAG_HTMLESCAPE_TBL_ITEM(961, L"ρ"), ///< rho ρ U+03C1 (961)
TAG_HTMLESCAPE_TBL_ITEM(962, L"ς"), ///< sigmaf ς U+03C2 (962)
TAG_HTMLESCAPE_TBL_ITEM(963, L"σ"), ///< sigma σ U+03C3 (963)
TAG_HTMLESCAPE_TBL_ITEM(964, L"τ"), ///< tau τ U+03C4 (964)
TAG_HTMLESCAPE_TBL_ITEM(965, L"υ"), ///< upsilon υ U+03C5 (965)
TAG_HTMLESCAPE_TBL_ITEM(966, L"φ"), ///< phi φ U+03C6 (966)
TAG_HTMLESCAPE_TBL_ITEM(967, L"χ"), ///< chi χ U+03C7 (967)
TAG_HTMLESCAPE_TBL_ITEM(968, L"ψ"), ///< psi ψ U+03C8 (968)
TAG_HTMLESCAPE_TBL_ITEM(969, L"ω"), ///< omega ω U+03C9 (969)
TAG_HTMLESCAPE_TBL_ITEM(977, L"ϑ"), ///< thetasym ϑ U+03D1 (977)
TAG_HTMLESCAPE_TBL_ITEM(978, L"ϒ"), ///< upsih ϒ U+03D2 (978)
TAG_HTMLESCAPE_TBL_ITEM(982, L"ϖ"), ///< piv ϖ U+03D6 (982)
TAG_HTMLESCAPE_TBL_ITEM(8194, L" "), ///< ensp U+2002 (8194)
TAG_HTMLESCAPE_TBL_ITEM(8195, L" "), ///< emsp U+2003 (8195)
TAG_HTMLESCAPE_TBL_ITEM(8201, L" "), ///< thinsp U+2009 (8201)
TAG_HTMLESCAPE_TBL_ITEM(8204, L""), ///< zwnj U+200C (8204)
TAG_HTMLESCAPE_TBL_ITEM(8205, L""), ///< zwj U+200D (8205)
TAG_HTMLESCAPE_TBL_ITEM(8206, L""), ///< lrm U+200E (8206)
TAG_HTMLESCAPE_TBL_ITEM(8207, L""), ///< rlm U+200F (8207)
TAG_HTMLESCAPE_TBL_ITEM(8211, L"–"), ///< ndash – U+2013 (8211)
TAG_HTMLESCAPE_TBL_ITEM(8212, L"—"), ///< mdash — U+2014 (8212)
TAG_HTMLESCAPE_TBL_ITEM(8216, L"‘"), ///< lsquo ‘ U+2018 (8216)
TAG_HTMLESCAPE_TBL_ITEM(8217, L"’"), ///< rsquo ’ U+2019 (8217)
TAG_HTMLESCAPE_TBL_ITEM(8218, L"‚"), ///< sbquo ‚ U+201A (8218)
TAG_HTMLESCAPE_TBL_ITEM(8220, L"“"), ///< ldquo “ U+201C (8220)
TAG_HTMLESCAPE_TBL_ITEM(8221, L"”"), ///< rdquo ” U+201D (8221)
TAG_HTMLESCAPE_TBL_ITEM(8222, L"„"), ///< bdquo „ U+201E (8222)
TAG_HTMLESCAPE_TBL_ITEM(8224, L"†"), ///< dagger † U+2020 (8224)
TAG_HTMLESCAPE_TBL_ITEM(8225, L"‡"), ///< Dagger ‡ U+2021 (8225)
TAG_HTMLESCAPE_TBL_ITEM(8226, L"•"), ///< bull • U+2022 (8226)
TAG_HTMLESCAPE_TBL_ITEM(8230, L"…"), ///< hellip … U+2026 (8230)
TAG_HTMLESCAPE_TBL_ITEM(8240, L"‰"), ///< permil ‰ U+2030 (8240)
TAG_HTMLESCAPE_TBL_ITEM(8242, L"′"), ///< prime ′ U+2032 (8242)
TAG_HTMLESCAPE_TBL_ITEM(8243, L"″"), ///< Prime ″ U+2033 (8243)
TAG_HTMLESCAPE_TBL_ITEM(8249, L"‹"), ///< lsaquo ‹ U+2039 (8249)
TAG_HTMLESCAPE_TBL_ITEM(8250, L"›"), ///< rsaquo › U+203A (8250)
TAG_HTMLESCAPE_TBL_ITEM(8254, L"‾"), ///< oline ‾ U+203E (8254)
TAG_HTMLESCAPE_TBL_ITEM(8260, L"⁄"), ///< frasl ⁄ U+2044 (8260)
TAG_HTMLESCAPE_TBL_ITEM(8364, L"€"), ///< euro € U+20AC (8364)
TAG_HTMLESCAPE_TBL_ITEM(8465, L"ℑ"), ///< image ℑ U+2111 (8465)
TAG_HTMLESCAPE_TBL_ITEM(8472, L"℘"), ///< weierp ℘ U+2118 (8472)
TAG_HTMLESCAPE_TBL_ITEM(8476, L"ℜ"), ///< real ℜ U+211C (8476)
TAG_HTMLESCAPE_TBL_ITEM(8482, L"™"), ///< trade ™ U+2122 (8482)
TAG_HTMLESCAPE_TBL_ITEM(8501, L"ℵ"), ///< alefsym ℵ U+2135 (8501)
TAG_HTMLESCAPE_TBL_ITEM(8592, L"←"), ///< larr ← U+2190 (8592)
TAG_HTMLESCAPE_TBL_ITEM(8593, L"↑"), ///< uarr ↑ U+2191 (8593)
TAG_HTMLESCAPE_TBL_ITEM(8594, L"→"), ///< rarr → U+2192 (8594)
TAG_HTMLESCAPE_TBL_ITEM(8595, L"↓"), ///< darr ↓ U+2193 (8595)
TAG_HTMLESCAPE_TBL_ITEM(8596, L"↔"), ///< harr ↔ U+2194 (8596)
TAG_HTMLESCAPE_TBL_ITEM(8629, L"↵"), ///< crarr ↵ U+21B5 (8629)
TAG_HTMLESCAPE_TBL_ITEM(8656, L"⇐"), ///< lArr ⇐ U+21D0 (8656)
TAG_HTMLESCAPE_TBL_ITEM(8657, L"⇑"), ///< uArr ⇑ U+21D1 (8657)
TAG_HTMLESCAPE_TBL_ITEM(8658, L"⇒"), ///< rArr ⇒ U+21D2 (8658)
TAG_HTMLESCAPE_TBL_ITEM(8659, L"⇓"), ///< dArr ⇓ U+21D3 (8659)
TAG_HTMLESCAPE_TBL_ITEM(8660, L"⇔"), ///< hArr ⇔ U+21D4 (8660)
TAG_HTMLESCAPE_TBL_ITEM(8704, L"∀"), ///< forall ∀ U+2200 (8704)
TAG_HTMLESCAPE_TBL_ITEM(8706, L"∂"), ///< part ∂ U+2202 (8706)
TAG_HTMLESCAPE_TBL_ITEM(8707, L"∃"), ///< exist ∃ U+2203 (8707)
TAG_HTMLESCAPE_TBL_ITEM(8709, L"∅"), ///< empty ∅ U+2205 (8709)
TAG_HTMLESCAPE_TBL_ITEM(8711, L"∇"), ///< nabla ∇ U+2207 (8711)
TAG_HTMLESCAPE_TBL_ITEM(8712, L"∈"), ///< isin ∈ U+2208 (8712)
TAG_HTMLESCAPE_TBL_ITEM(8713, L"∉"), ///< notin ∉ U+2209 (8713)
TAG_HTMLESCAPE_TBL_ITEM(8715, L"∋"), ///< ni ∋ U+220B (8715)
TAG_HTMLESCAPE_TBL_ITEM(8719, L"∏"), ///< prod ∏ U+220F (8719)
TAG_HTMLESCAPE_TBL_ITEM(8721, L"∑"), ///< sum ∑ U+2211 (8721)
TAG_HTMLESCAPE_TBL_ITEM(8722, L"−"), ///< minus − U+2212 (8722)
TAG_HTMLESCAPE_TBL_ITEM(8727, L"∗"), ///< lowast ∗ U+2217 (8727)
TAG_HTMLESCAPE_TBL_ITEM(8730, L"√"), ///< radic √ U+221A (8730)
TAG_HTMLESCAPE_TBL_ITEM(8733, L"∝"), ///< prop ∝ U+221D (8733)
TAG_HTMLESCAPE_TBL_ITEM(8734, L"∞"), ///< infin ∞ U+221E (8734)
TAG_HTMLESCAPE_TBL_ITEM(8736, L"∠"), ///< ang ∠ U+2220 (8736)
TAG_HTMLESCAPE_TBL_ITEM(8743, L"∧"), ///< and ∧ U+2227 (8743)
TAG_HTMLESCAPE_TBL_ITEM(8744, L"∨"), ///< or ∨ U+2228 (8744)
TAG_HTMLESCAPE_TBL_ITEM(8745, L"∩"), ///< cap ∩ U+2229 (8745)
TAG_HTMLESCAPE_TBL_ITEM(8746, L"∪"), ///< cup ∪ U+222A (8746)
TAG_HTMLESCAPE_TBL_ITEM(8747, L"∫"), ///< int ∫ U+222B (8747)
TAG_HTMLESCAPE_TBL_ITEM(8756, L"∴"), ///< there4 ∴ U+2234 (8756)
TAG_HTMLESCAPE_TBL_ITEM(8764, L"∼"), ///< sim ∼ U+223C (8764)
TAG_HTMLESCAPE_TBL_ITEM(8773, L"≅"), ///< cong ≅ U+2245 (8773)
TAG_HTMLESCAPE_TBL_ITEM(8776, L"≈"), ///< asymp ≈ U+2248 (8776)
TAG_HTMLESCAPE_TBL_ITEM(8800, L"≠"), ///< ne ≠ U+2260 (8800)
TAG_HTMLESCAPE_TBL_ITEM(8801, L"≡"), ///< equiv ≡ U+2261 (8801)
TAG_HTMLESCAPE_TBL_ITEM(8804, L"≤"), ///< le ≤ U+2264 (8804)
TAG_HTMLESCAPE_TBL_ITEM(8805, L"≥"), ///< ge ≥ U+2265 (8805)
TAG_HTMLESCAPE_TBL_ITEM(8834, L"⊂"), ///< sub ⊂ U+2282 (8834)
TAG_HTMLESCAPE_TBL_ITEM(8835, L"⊃"), ///< sup ⊃ U+2283 (8835)
TAG_HTMLESCAPE_TBL_ITEM(8836, L"⊄"), ///< nsub ⊄ U+2284 (8836)
TAG_HTMLESCAPE_TBL_ITEM(8838, L"⊆"), ///< sube ⊆ U+2286 (8838)
TAG_HTMLESCAPE_TBL_ITEM(8839, L"⊇"), ///< supe ⊇ U+2287 (8839)
TAG_HTMLESCAPE_TBL_ITEM(8853, L"⊕"), ///< oplus ⊕ U+2295 (8853)
TAG_HTMLESCAPE_TBL_ITEM(8855, L"⊗"), ///< otimes ⊗ U+2297 (8855)
TAG_HTMLESCAPE_TBL_ITEM(8869, L"⊥"), ///< perp ⊥ U+22A5 (8869)
TAG_HTMLESCAPE_TBL_ITEM(8901, L"⋅"), ///< sdot ⋅ U+22C5 (8901)
TAG_HTMLESCAPE_TBL_ITEM(8968, L"⌈"), ///< lceil ⌈ U+2308 (8968)
TAG_HTMLESCAPE_TBL_ITEM(8969, L"⌉"), ///< rceil ⌉ U+2309 (8969)
TAG_HTMLESCAPE_TBL_ITEM(8970, L"⌊"), ///< lfloor ⌊ U+230A (8970)
TAG_HTMLESCAPE_TBL_ITEM(8971, L"⌋"), ///< rfloor ⌋ U+230B (8971)
TAG_HTMLESCAPE_TBL_ITEM(9001, L"⟨"), ///< lang 〈 U+2329 (9001)
TAG_HTMLESCAPE_TBL_ITEM(9002, L"⟩"), ///< rang 〉 U+232A (9002)
TAG_HTMLESCAPE_TBL_ITEM(9674, L"◊"), ///< loz ◊ U+25CA (9674)
TAG_HTMLESCAPE_TBL_ITEM(9824, L"♠"), ///< spades ♠ U+2660 (9824)
TAG_HTMLESCAPE_TBL_ITEM(9827, L"♣"), ///< clubs ♣ U+2663 (9827)
TAG_HTMLESCAPE_TBL_ITEM(9829, L"♥"), ///< hearts ♥ U+2665 (9829)
TAG_HTMLESCAPE_TBL_ITEM(9830, L"♦"), ///< diams ♦ U+2666 (9830)
};
// 9830 is the max value of html escape character
// we will take the value as the position index on g_pArr_tag_htmlescape_tbl_item
#define MAX_VALUE_HTML_ESCAPE_CHAR (9830 + 1)
namespace ns_base
{
void init_string_convert_escape_character()
{
int iIndex = 0;
int iPos = 0;
if (NULL == g_pArr_tag_htmlescape_tbl_item)
{
g_pArr_tag_htmlescape_tbl_item = new TAG_HTMLESCAPE_TBL_ITEM[MAX_VALUE_HTML_ESCAPE_CHAR];
for (iIndex = 0; iIndex < g_iSize_HtmlEscapeTbl; iIndex++)
{
iPos = g_HtmlEscapeTbl[iIndex].cValue;
g_pArr_tag_htmlescape_tbl_item[iPos].bValidData = TRUE;
g_pArr_tag_htmlescape_tbl_item[iPos].cValue = g_HtmlEscapeTbl[iIndex].cValue;
g_pArr_tag_htmlescape_tbl_item[iPos].strHtmlEscape = g_HtmlEscapeTbl[iIndex].strHtmlEscape.c_str();
}
}
}
void uninit_string_convert_escape_character()
{
SAFE_DELETE_ARRAY(g_pArr_tag_htmlescape_tbl_item);
}
std::string string_convert_escape_character_a(IN std::string strIn)
{
std::wstring strTemp = L"";
std::string strOutA = "";
strTemp = string_convert_escape_character_w(ns_base::A2Wex(strIn.c_str()));
strOutA = ns_base::W2Aex(strTemp.c_str());
return strOutA;
}
std::wstring string_convert_escape_character_w(IN std::wstring strIn)
{
std::wstring strTemp = L"";
string2HtmlEscape(strIn, strTemp);
return strTemp;
}
BOOL string2HtmlEscape(IN const std::wstring& strIn, OUT std::wstring& strHtmlEscape)
{
size_t nIndex = 0;
WCHAR cCur = L'\0';
size_t nLenInW = _tcslen(strIn.c_str());
std::wstring strTemp = L"";
strHtmlEscape = L"";
for (nIndex = 0; nIndex < nLenInW; nIndex++)
{
cCur = strIn[nIndex];
char2HtmlEscape(cCur, strTemp);
strHtmlEscape += strTemp.c_str();
}
return TRUE;
}
BOOL char2HtmlEscape(IN WCHAR cIn, OUT std::wstring& strHtmlEscape)
{
if (IsHtmlEscape(cIn))
{
strHtmlEscape = g_pArr_tag_htmlescape_tbl_item[cIn].strHtmlEscape.c_str();
}
else
{
strHtmlEscape = cIn;
}
return TRUE;
}
BOOL IsHtmlEscape(IN WCHAR cIn)
{
BOOL bRc = FALSE;
do
{
if ((cIn >= MAX_VALUE_HTML_ESCAPE_CHAR)
|| (!g_pArr_tag_htmlescape_tbl_item[cIn].bValidData))
{
break;
}
bRc = TRUE;
} while (0);
return bRc;
}
}