html字符串转义的函数封装

前几天,需要对输入的xml节点的数据进行html转义,否则导致后续操作失败.

节点内容是第三方的, 有可能输入所有可见字符.

前段时间,只对几个常用的html字符进行了转义. 结果导致不时的会有一个需要转义的字符在输入内容中出现.

这几天,可以整理工程了。对html串转义操作,进行了封装。以后再也不会出现由于未进行html字符串转义引起的操作失败.


html转义字符集的定义 : http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references

工程下载点: testcase_HtmlEscape_2015_0415_1722.zip


// testcase_HtmlEscape.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include "HtmlEscapeHelper/HtmlEscapeHelper.h"

int _tmain(int argc, _TCHAR* argv[])
{
    int         iIndex = 0;
    std::wstring         strHtml = L"";
    std::wstring         strHtmlEscape = L"";
    std::wstring         strTemp = L"";

    // _tsetlocale( LC_ALL, _T( "chs" ) );
    _tsetlocale(LC_CTYPE, L".936");    //< 控制台为中文输出
    ns_base::init_string_convert_escape_character();

//     TCHAR pszTest[] = _T("123Abc汉字€鴕㎜㎎㊣㎞㏄㏑㏕㏒"); 
//     pszTest[1] = (WCHAR)169; ///< 需要转义的字符打印不出来啊, 显示为 ? ...
//     pszTest[2] = (WCHAR)174;
//     _tprintf(L"%s\r\n", pszTest);
//     strTemp = pszTest;
//     _tprintf(L"%s\r\n", strTemp.c_str());

    /// build a html string have chars need escape
    strHtml = L"http://test.com/index.php&name=";
    strHtml += (WCHAR)169; ///< ©
    strHtml += L"someone";
    strHtml += (WCHAR)174; ///< ®
    strHtml += L"\0";
    _tprintf(L"%s\r\n", strHtml.c_str());

    /// 这段转义后的内容, 主要用于xml节点内容
    _tprintf(L"html escape begin:\r\n");
    strHtmlEscape = ns_base::string_convert_escape_character_w(strHtml);

    _tprintf(L"before escape: %s\r\n after escape: %s\r\n", 
        strHtml.c_str(), 
        strHtmlEscape.c_str());

    ns_base::uninit_string_convert_escape_character();
    printf("结束, press any key to quit\r\n");

    /** run result
    http://test.com/index.php&name=?someone?
    html escape begin:
    before escape: http://test.com/index.php&name=?someone?
    after escape: http://test.com/index.php&name=&copy;someone&reg;
    结束, press any key to quit
    */
    getwchar();
	return 0;
}


/// @file       HtmlEscapeHelper.h 
/// @brief      对html字符串进行转义

#include "stdafx.h"

#ifndef __HTML_ESCAPE_HELPER_H__
#define __HTML_ESCAPE_HELPER_H__

namespace ns_base
{
    /// 为了查的快, 弄了一个new出来的大数组, 数组成员类型是 TAG_HTMLESCAPE_TBL_ITEM
    /// 里面只有一部分是有效的
    /// 当程序启动时, new数组 !
    /// 当程序退出时, delete数组 !
    void init_string_convert_escape_character();
    void uninit_string_convert_escape_character();

    /// 将一个字符串进行html转义
    std::wstring string_convert_escape_character_w(IN std::wstring strIn);
    std::string string_convert_escape_character_a(IN std::string strIn);

    /// 将一个字符串进行html转义
    BOOL string2HtmlEscape(IN const std::wstring& strIn, OUT std::wstring& strHtmlEscape);

    /// 将一个字符进行html转义
    BOOL char2HtmlEscape(IN WCHAR cIn, OUT std::wstring& strHtmlEscape);

    /// 是否为html转义字符
    BOOL IsHtmlEscape(IN WCHAR cIn);
}

#endif // #ifndef __HTML_ESCAPE_HELPER_H__

/// @file       HtmlEscapeHelper.cpp

#include "stdafx.h"
#include "HtmlEscapeHelper.h"
#include "StringHelper.h"
#include "constDefine_BaseSupportLayer.h"

/// 放html字符和转义字符串对应关系的结构体
typedef struct _tag_HtmlEscape_Tbl_Item
{
    BOOL                bValidData;         ///< 该数据是否有效
    WCHAR               cValue;             ///< 字符值
    std::wstring        strHtmlEscape;      ///< 对应的转义字符

    _tag_HtmlEscape_Tbl_Item()
    {
        bValidData = FALSE;
        cValue = L'\0';
        strHtmlEscape = L"";
    }

    _tag_HtmlEscape_Tbl_Item(WCHAR cValue, WCHAR* pcHtmlEscape)
    {
        this->bValidData = TRUE;
        this->cValue = cValue;
        this->strHtmlEscape = (NULL != pcHtmlEscape) ? pcHtmlEscape : L"";
    }
}TAG_HTMLESCAPE_TBL_ITEM;

TAG_HTMLESCAPE_TBL_ITEM* g_pArr_tag_htmlescape_tbl_item = NULL;

/// 完整的转义字符表定义出处
/// http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references

const int g_iSize_HtmlEscapeTbl = 253;
const TAG_HTMLESCAPE_TBL_ITEM g_HtmlEscapeTbl[g_iSize_HtmlEscapeTbl] = 
{
    TAG_HTMLESCAPE_TBL_ITEM(34, L"""),
    TAG_HTMLESCAPE_TBL_ITEM(38, L"&"),
    TAG_HTMLESCAPE_TBL_ITEM(39, L"'"),
    TAG_HTMLESCAPE_TBL_ITEM(60, L"<"),
    TAG_HTMLESCAPE_TBL_ITEM(62, L">"),

    TAG_HTMLESCAPE_TBL_ITEM(160, L" "),
    TAG_HTMLESCAPE_TBL_ITEM(161, L"¡"),
    TAG_HTMLESCAPE_TBL_ITEM(162, L"¢"),
    TAG_HTMLESCAPE_TBL_ITEM(163, L"£"),
    TAG_HTMLESCAPE_TBL_ITEM(164, L"¤"),
    TAG_HTMLESCAPE_TBL_ITEM(165, L"¥"),
    TAG_HTMLESCAPE_TBL_ITEM(166, L"¦"),
    TAG_HTMLESCAPE_TBL_ITEM(167, L"§"),
    TAG_HTMLESCAPE_TBL_ITEM(168, L"¨"),
    TAG_HTMLESCAPE_TBL_ITEM(169, L"&copy;"),
    TAG_HTMLESCAPE_TBL_ITEM(170, L"ª"),
    TAG_HTMLESCAPE_TBL_ITEM(171, L"«"),
    TAG_HTMLESCAPE_TBL_ITEM(172, L"¬"),
    TAG_HTMLESCAPE_TBL_ITEM(173, L"­"),
    TAG_HTMLESCAPE_TBL_ITEM(174, L"&reg;"),
    TAG_HTMLESCAPE_TBL_ITEM(175, L"¯"),
    TAG_HTMLESCAPE_TBL_ITEM(176, L"°"),
    TAG_HTMLESCAPE_TBL_ITEM(177, L"±"),
    TAG_HTMLESCAPE_TBL_ITEM(178, L"²"),
    TAG_HTMLESCAPE_TBL_ITEM(179, L"³"),
    TAG_HTMLESCAPE_TBL_ITEM(180, L"´"),
    TAG_HTMLESCAPE_TBL_ITEM(181, L"µ"),
    TAG_HTMLESCAPE_TBL_ITEM(182, L"¶"),
    TAG_HTMLESCAPE_TBL_ITEM(183, L"·"),
    TAG_HTMLESCAPE_TBL_ITEM(184, L"¸"),
    TAG_HTMLESCAPE_TBL_ITEM(185, L"¹"),
    TAG_HTMLESCAPE_TBL_ITEM(186, L"º"),
    TAG_HTMLESCAPE_TBL_ITEM(187, L"»"),
    TAG_HTMLESCAPE_TBL_ITEM(188, L"¼"),
    TAG_HTMLESCAPE_TBL_ITEM(189, L"½"),
    TAG_HTMLESCAPE_TBL_ITEM(190, L"¾"),
    TAG_HTMLESCAPE_TBL_ITEM(191, L"¿"),
    TAG_HTMLESCAPE_TBL_ITEM(192, L"À"),

    TAG_HTMLESCAPE_TBL_ITEM(193, L"Á"),
    TAG_HTMLESCAPE_TBL_ITEM(194, L"ˆ"),
    TAG_HTMLESCAPE_TBL_ITEM(195, L"Ã"),
    TAG_HTMLESCAPE_TBL_ITEM(196, L"Ä"),
    TAG_HTMLESCAPE_TBL_ITEM(197, L"˚"),
    TAG_HTMLESCAPE_TBL_ITEM(198, L"Æ"),
    TAG_HTMLESCAPE_TBL_ITEM(199, L"Ç"),
    TAG_HTMLESCAPE_TBL_ITEM(200, L"È"),
    TAG_HTMLESCAPE_TBL_ITEM(201, L"É"),
    TAG_HTMLESCAPE_TBL_ITEM(202, L"Ê"),
    TAG_HTMLESCAPE_TBL_ITEM(203, L"Ë"),
    TAG_HTMLESCAPE_TBL_ITEM(204, L"Ì"),
    TAG_HTMLESCAPE_TBL_ITEM(205, L"Í"),
    TAG_HTMLESCAPE_TBL_ITEM(206, L"Î"),
    TAG_HTMLESCAPE_TBL_ITEM(207, L"Ï"),
    TAG_HTMLESCAPE_TBL_ITEM(208, L"Ð"),
    TAG_HTMLESCAPE_TBL_ITEM(209, L"Ñ"),
    TAG_HTMLESCAPE_TBL_ITEM(210, L"Ò"),
    TAG_HTMLESCAPE_TBL_ITEM(211, L"Ó"),
    TAG_HTMLESCAPE_TBL_ITEM(212, L"Ô"),
    TAG_HTMLESCAPE_TBL_ITEM(213, L"Õ"),
    TAG_HTMLESCAPE_TBL_ITEM(214, L"Ö"),
    TAG_HTMLESCAPE_TBL_ITEM(215, L"×"),
    TAG_HTMLESCAPE_TBL_ITEM(216, L"Ø"),
    TAG_HTMLESCAPE_TBL_ITEM(217, L"Ù"),
    TAG_HTMLESCAPE_TBL_ITEM(218, L"Ú"),
    TAG_HTMLESCAPE_TBL_ITEM(219, L"Û"),
    TAG_HTMLESCAPE_TBL_ITEM(220, L"Ü"),
    TAG_HTMLESCAPE_TBL_ITEM(221, L"Ý"),
    TAG_HTMLESCAPE_TBL_ITEM(222, L"Þ"),
    TAG_HTMLESCAPE_TBL_ITEM(223, L"ß"),
    TAG_HTMLESCAPE_TBL_ITEM(224, L"à"),

    TAG_HTMLESCAPE_TBL_ITEM(225, L"á"),
    TAG_HTMLESCAPE_TBL_ITEM(226, L"â"),
    TAG_HTMLESCAPE_TBL_ITEM(227, L"ã"),
    TAG_HTMLESCAPE_TBL_ITEM(228, L"ä"),
    TAG_HTMLESCAPE_TBL_ITEM(229, L"å"),
    TAG_HTMLESCAPE_TBL_ITEM(230, L"æ"),
    TAG_HTMLESCAPE_TBL_ITEM(231, L"ç"),
    TAG_HTMLESCAPE_TBL_ITEM(232, L"è"),
    TAG_HTMLESCAPE_TBL_ITEM(233, L"é"),
    TAG_HTMLESCAPE_TBL_ITEM(234, L"ê"),
    TAG_HTMLESCAPE_TBL_ITEM(235, L"ë"),
    TAG_HTMLESCAPE_TBL_ITEM(236, L"ì"),
    TAG_HTMLESCAPE_TBL_ITEM(237, L"í"),
    TAG_HTMLESCAPE_TBL_ITEM(238, L"î"),
    TAG_HTMLESCAPE_TBL_ITEM(239, L"ï"),
    TAG_HTMLESCAPE_TBL_ITEM(240, L"&ieth;"),
    TAG_HTMLESCAPE_TBL_ITEM(241, L"ñ"),
    TAG_HTMLESCAPE_TBL_ITEM(242, L"ò"),
    TAG_HTMLESCAPE_TBL_ITEM(243, L"ó"),
    TAG_HTMLESCAPE_TBL_ITEM(244, L"ô"),
    TAG_HTMLESCAPE_TBL_ITEM(245, L"õ"),
    TAG_HTMLESCAPE_TBL_ITEM(246, L"ö"),
    TAG_HTMLESCAPE_TBL_ITEM(247, L"÷"),
    TAG_HTMLESCAPE_TBL_ITEM(248, L"ø"),
    TAG_HTMLESCAPE_TBL_ITEM(249, L"ù"),
    TAG_HTMLESCAPE_TBL_ITEM(250, L"ú"),
    TAG_HTMLESCAPE_TBL_ITEM(251, L"û"),
    TAG_HTMLESCAPE_TBL_ITEM(252, L"ü"),
    TAG_HTMLESCAPE_TBL_ITEM(253, L"ý"),
    TAG_HTMLESCAPE_TBL_ITEM(254, L"þ"),
    TAG_HTMLESCAPE_TBL_ITEM(255, L"ÿ"),

    TAG_HTMLESCAPE_TBL_ITEM(338, L"Œ"), ///< OElig 	Œ 	U+0152 (338)
    TAG_HTMLESCAPE_TBL_ITEM(339, L"œ"), ///< oelig 	œ 	U+0153 (339)
    TAG_HTMLESCAPE_TBL_ITEM(352, L"Š"), ///< Scaron 	Š 	U+0160 (352)
    TAG_HTMLESCAPE_TBL_ITEM(353, L"š"), ///< scaron 	š 	U+0161 (353)
    TAG_HTMLESCAPE_TBL_ITEM(376, L"Ÿ"), ///< Yuml 	Ÿ 	U+0178 (376)

    TAG_HTMLESCAPE_TBL_ITEM(402, L"ƒ"), ///< fnof 	ƒ 	U+0192 (402)

    TAG_HTMLESCAPE_TBL_ITEM(710, L"ˆ"), ///< circ 	ˆ 	U+02C6 (710)
    TAG_HTMLESCAPE_TBL_ITEM(732, L"˜"), ///< tilde 	˜ 	U+02DC (732)

    TAG_HTMLESCAPE_TBL_ITEM(913, L"Α"), ///< Alpha 	Α 	U+0391 (913)
    TAG_HTMLESCAPE_TBL_ITEM(914, L"Β"), ///< Beta 	Β 	U+0392 (914)
    TAG_HTMLESCAPE_TBL_ITEM(915, L"Γ"), ///< Gamma 	Γ 	U+0393 (915)
    TAG_HTMLESCAPE_TBL_ITEM(916, L"Δ"), ///< Delta 	Δ 	U+0394 (916)
    TAG_HTMLESCAPE_TBL_ITEM(917, L"Ε"), ///< Epsilon 	Ε 	U+0395 (917)
    TAG_HTMLESCAPE_TBL_ITEM(918, L"Ζ"), ///< Zeta 	Ζ 	U+0396 (918)
    TAG_HTMLESCAPE_TBL_ITEM(919, L"Η"), ///< Eta 	Η 	U+0397 (919)
    TAG_HTMLESCAPE_TBL_ITEM(920, L"Θ"), ///< Theta 	Θ 	U+0398 (920)
    TAG_HTMLESCAPE_TBL_ITEM(921, L"Ι"), ///< Iota 	Ι 	U+0399 (921)
    TAG_HTMLESCAPE_TBL_ITEM(922, L"Κ"), ///< Kappa 	Κ 	U+039A (922)
    TAG_HTMLESCAPE_TBL_ITEM(923, L"Λ"), ///< Lambda 	Λ 	U+039B (923)
    TAG_HTMLESCAPE_TBL_ITEM(924, L"Μ"), ///< Mu 	Μ 	U+039C (924)
    TAG_HTMLESCAPE_TBL_ITEM(925, L"Ν"), ///< Nu 	Ν 	U+039D (925)
    TAG_HTMLESCAPE_TBL_ITEM(926, L"Ξ"), ///< Xi 	Ξ 	U+039E (926)
    TAG_HTMLESCAPE_TBL_ITEM(927, L"Ο"), ///< Omicron 	Ο 	U+039F (927)
    TAG_HTMLESCAPE_TBL_ITEM(928, L"Π"), ///< Pi 	Π 	U+03A0 (928)
    TAG_HTMLESCAPE_TBL_ITEM(929, L"Ρ"), ///< Rho 	Ρ 	U+03A1 (929)
    TAG_HTMLESCAPE_TBL_ITEM(931, L"Σ"), ///< Sigma 	Σ 	U+03A3 (931)
    TAG_HTMLESCAPE_TBL_ITEM(932, L"Τ"), ///< Tau 	Τ 	U+03A4 (932)
    TAG_HTMLESCAPE_TBL_ITEM(933, L"Υ"), ///< Upsilon 	Υ 	U+03A5 (933)
    TAG_HTMLESCAPE_TBL_ITEM(934, L"Φ"), ///< Phi 	Φ 	U+03A6 (934)
    TAG_HTMLESCAPE_TBL_ITEM(935, L"Χ"), ///< Chi 	Χ 	U+03A7 (935)
    TAG_HTMLESCAPE_TBL_ITEM(936, L"Ψ"), ///< Psi 	Ψ 	U+03A8 (936)
    TAG_HTMLESCAPE_TBL_ITEM(937, L"Ω"), ///< Omega 	Ω 	U+03A9 (937)
    TAG_HTMLESCAPE_TBL_ITEM(945, L"α"), ///< alpha 	α 	U+03B1 (945)
    TAG_HTMLESCAPE_TBL_ITEM(946, L"β"), ///< beta 	β 	U+03B2 (946)
    TAG_HTMLESCAPE_TBL_ITEM(947, L"γ"), ///< gamma 	γ 	U+03B3 (947)
    TAG_HTMLESCAPE_TBL_ITEM(948, L"δ"), ///< delta 	δ 	U+03B4 (948)
    TAG_HTMLESCAPE_TBL_ITEM(949, L"ε"), ///< epsilon 	ε 	U+03B5 (949)
    TAG_HTMLESCAPE_TBL_ITEM(950, L"ζ"), ///< zeta 	ζ 	U+03B6 (950)
    TAG_HTMLESCAPE_TBL_ITEM(951, L"η"), ///< eta 	η 	U+03B7 (951)
    TAG_HTMLESCAPE_TBL_ITEM(952, L"θ"), ///< theta 	θ 	U+03B8 (952)
    TAG_HTMLESCAPE_TBL_ITEM(953, L"ι"), ///< iota 	ι 	U+03B9 (953)
    TAG_HTMLESCAPE_TBL_ITEM(954, L"κ"), ///< kappa 	κ 	U+03BA (954)
    TAG_HTMLESCAPE_TBL_ITEM(955, L"λ"), ///< lambda 	λ 	U+03BB (955)
    TAG_HTMLESCAPE_TBL_ITEM(956, L"μ"), ///< mu 	μ 	U+03BC (956)
    TAG_HTMLESCAPE_TBL_ITEM(957, L"ν"), ///< nu 	ν 	U+03BD (957)
    TAG_HTMLESCAPE_TBL_ITEM(958, L"ξ"), ///< xi 	ξ 	U+03BE (958)
    TAG_HTMLESCAPE_TBL_ITEM(959, L"ο"), ///< omicron 	ο 	U+03BF (959)
    TAG_HTMLESCAPE_TBL_ITEM(960, L"π"), ///< pi 	π 	U+03C0 (960)
    TAG_HTMLESCAPE_TBL_ITEM(961, L"ρ"), ///< rho 	ρ 	U+03C1 (961)
    TAG_HTMLESCAPE_TBL_ITEM(962, L"ς"), ///< sigmaf 	ς 	U+03C2 (962)
    TAG_HTMLESCAPE_TBL_ITEM(963, L"σ"), ///< sigma 	σ 	U+03C3 (963)
    TAG_HTMLESCAPE_TBL_ITEM(964, L"τ"), ///< tau 	τ 	U+03C4 (964)
    TAG_HTMLESCAPE_TBL_ITEM(965, L"υ"), ///< upsilon 	υ 	U+03C5 (965)
    TAG_HTMLESCAPE_TBL_ITEM(966, L"φ"), ///< phi 	φ 	U+03C6 (966)
    TAG_HTMLESCAPE_TBL_ITEM(967, L"χ"), ///< chi 	χ 	U+03C7 (967)
    TAG_HTMLESCAPE_TBL_ITEM(968, L"ψ"), ///< psi 	ψ 	U+03C8 (968)
    TAG_HTMLESCAPE_TBL_ITEM(969, L"ω"), ///< omega 	ω 	U+03C9 (969)
    TAG_HTMLESCAPE_TBL_ITEM(977, L"ϑ"), ///< thetasym 	ϑ 	U+03D1 (977)
    TAG_HTMLESCAPE_TBL_ITEM(978, L"ϒ"), ///< upsih 	ϒ 	U+03D2 (978)
    TAG_HTMLESCAPE_TBL_ITEM(982, L"ϖ"), ///< piv 	ϖ 	U+03D6 (982)

    TAG_HTMLESCAPE_TBL_ITEM(8194, L" "), ///< ensp 	  	U+2002 (8194)
    TAG_HTMLESCAPE_TBL_ITEM(8195, L" "), ///< emsp 	  	U+2003 (8195)
    TAG_HTMLESCAPE_TBL_ITEM(8201, L" "), ///< thinsp 	  	U+2009 (8201)
    TAG_HTMLESCAPE_TBL_ITEM(8204, L"‌"), ///< zwnj 	  	U+200C (8204)
    TAG_HTMLESCAPE_TBL_ITEM(8205, L"‍"), ///< zwj 	  	U+200D (8205)
    TAG_HTMLESCAPE_TBL_ITEM(8206, L"‎"), ///< lrm 	  	U+200E (8206)
    TAG_HTMLESCAPE_TBL_ITEM(8207, L"‏"), ///< rlm 	  	U+200F (8207)
    TAG_HTMLESCAPE_TBL_ITEM(8211, L"–"), ///< ndash 	– 	U+2013 (8211)
    TAG_HTMLESCAPE_TBL_ITEM(8212, L"—"), ///< mdash 	— 	U+2014 (8212)
    TAG_HTMLESCAPE_TBL_ITEM(8216, L"‘"), ///< lsquo 	‘ 	U+2018 (8216)
    TAG_HTMLESCAPE_TBL_ITEM(8217, L"’"), ///< rsquo 	’ 	U+2019 (8217)
    TAG_HTMLESCAPE_TBL_ITEM(8218, L"‚"), ///< sbquo 	‚ 	U+201A (8218)
    TAG_HTMLESCAPE_TBL_ITEM(8220, L"“"), ///< ldquo 	“ 	U+201C (8220)
    TAG_HTMLESCAPE_TBL_ITEM(8221, L"”"), ///< rdquo 	” 	U+201D (8221)
    TAG_HTMLESCAPE_TBL_ITEM(8222, L"„"), ///< bdquo 	„ 	U+201E (8222)
    TAG_HTMLESCAPE_TBL_ITEM(8224, L"†"), ///< dagger 	† 	U+2020 (8224)
    TAG_HTMLESCAPE_TBL_ITEM(8225, L"‡"), ///< Dagger 	‡ 	U+2021 (8225)
    TAG_HTMLESCAPE_TBL_ITEM(8226, L"•"), ///< bull 	• 	U+2022 (8226)
    TAG_HTMLESCAPE_TBL_ITEM(8230, L"…"), ///< hellip 	… 	U+2026 (8230)
    TAG_HTMLESCAPE_TBL_ITEM(8240, L"‰"), ///< permil 	‰ 	U+2030 (8240)
    TAG_HTMLESCAPE_TBL_ITEM(8242, L"′"), ///< prime 	′ 	U+2032 (8242)
    TAG_HTMLESCAPE_TBL_ITEM(8243, L"″"), ///< Prime 	″ 	U+2033 (8243)
    TAG_HTMLESCAPE_TBL_ITEM(8249, L"‹"), ///< lsaquo 	‹ 	U+2039 (8249)
    TAG_HTMLESCAPE_TBL_ITEM(8250, L"›"), ///< rsaquo 	› 	U+203A (8250)
    TAG_HTMLESCAPE_TBL_ITEM(8254, L"‾"), ///< oline 	‾ 	U+203E (8254)
    TAG_HTMLESCAPE_TBL_ITEM(8260, L"⁄"), ///< frasl 	⁄ 	U+2044 (8260)

    TAG_HTMLESCAPE_TBL_ITEM(8364, L"€"), ///< euro 	€ 	U+20AC (8364)

    TAG_HTMLESCAPE_TBL_ITEM(8465, L"ℑ"), ///< image 	ℑ 	U+2111 (8465)
    TAG_HTMLESCAPE_TBL_ITEM(8472, L"℘"), ///< weierp 	℘ 	U+2118 (8472)
    TAG_HTMLESCAPE_TBL_ITEM(8476, L"ℜ"), ///< real 	ℜ 	U+211C (8476)
    TAG_HTMLESCAPE_TBL_ITEM(8482, L"™"), ///< trade 	™ 	U+2122 (8482)

    TAG_HTMLESCAPE_TBL_ITEM(8501, L"ℵ"), ///< alefsym 	ℵ 	U+2135 (8501)
    TAG_HTMLESCAPE_TBL_ITEM(8592, L"←"), ///< larr 	← 	U+2190 (8592)
    TAG_HTMLESCAPE_TBL_ITEM(8593, L"↑"), ///< uarr 	↑ 	U+2191 (8593)
    TAG_HTMLESCAPE_TBL_ITEM(8594, L"→"), ///< rarr 	→ 	U+2192 (8594)
    TAG_HTMLESCAPE_TBL_ITEM(8595, L"↓"), ///< darr 	↓ 	U+2193 (8595)
    TAG_HTMLESCAPE_TBL_ITEM(8596, L"↔"), ///< harr 	↔ 	U+2194 (8596)

    TAG_HTMLESCAPE_TBL_ITEM(8629, L"↵"), ///< crarr 	↵ 	U+21B5 (8629)
    TAG_HTMLESCAPE_TBL_ITEM(8656, L"⇐"), ///< lArr 	⇐ 	U+21D0 (8656)
    TAG_HTMLESCAPE_TBL_ITEM(8657, L"⇑"), ///< uArr 	⇑ 	U+21D1 (8657)
    TAG_HTMLESCAPE_TBL_ITEM(8658, L"⇒"), ///< rArr 	⇒ 	U+21D2 (8658)
    TAG_HTMLESCAPE_TBL_ITEM(8659, L"⇓"), ///< dArr 	⇓ 	U+21D3 (8659)
    TAG_HTMLESCAPE_TBL_ITEM(8660, L"⇔"), ///< hArr 	⇔ 	U+21D4 (8660)

    TAG_HTMLESCAPE_TBL_ITEM(8704, L"∀"), ///< forall 	∀ 	U+2200 (8704)
    TAG_HTMLESCAPE_TBL_ITEM(8706, L"∂"), ///< part 	∂ 	U+2202 (8706)
    TAG_HTMLESCAPE_TBL_ITEM(8707, L"∃"), ///< exist 	∃ 	U+2203 (8707)
    TAG_HTMLESCAPE_TBL_ITEM(8709, L"∅"), ///< empty 	∅ 	U+2205 (8709)
    TAG_HTMLESCAPE_TBL_ITEM(8711, L"∇"), ///< nabla 	∇ 	U+2207 (8711)
    TAG_HTMLESCAPE_TBL_ITEM(8712, L"∈"), ///< isin 	∈ 	U+2208 (8712)
    TAG_HTMLESCAPE_TBL_ITEM(8713, L"∉"), ///< notin 	∉ 	U+2209 (8713)
    TAG_HTMLESCAPE_TBL_ITEM(8715, L"∋"), ///< ni 	∋ 	U+220B (8715)
    TAG_HTMLESCAPE_TBL_ITEM(8719, L"∏"), ///< prod 	∏ 	U+220F (8719)
    TAG_HTMLESCAPE_TBL_ITEM(8721, L"∑"), ///< sum 	∑ 	U+2211 (8721)
    TAG_HTMLESCAPE_TBL_ITEM(8722, L"−"), ///< minus 	− 	U+2212 (8722)
    TAG_HTMLESCAPE_TBL_ITEM(8727, L"∗"), ///< lowast 	∗ 	U+2217 (8727)
    TAG_HTMLESCAPE_TBL_ITEM(8730, L"√"), ///< radic 	√ 	U+221A (8730)
    TAG_HTMLESCAPE_TBL_ITEM(8733, L"∝"), ///< prop 	∝ 	U+221D (8733) 
    TAG_HTMLESCAPE_TBL_ITEM(8734, L"∞"), ///< infin 	∞ 	U+221E (8734)
    TAG_HTMLESCAPE_TBL_ITEM(8736, L"∠"), ///< ang 	∠ 	U+2220 (8736)
    TAG_HTMLESCAPE_TBL_ITEM(8743, L"∧"), ///< and 	∧ 	U+2227 (8743)
    TAG_HTMLESCAPE_TBL_ITEM(8744, L"∨"), ///< or 	∨ 	U+2228 (8744)
    TAG_HTMLESCAPE_TBL_ITEM(8745, L"∩"), ///< cap 	∩ 	U+2229 (8745)
    TAG_HTMLESCAPE_TBL_ITEM(8746, L"∪"), ///< cup 	∪ 	U+222A (8746)
    TAG_HTMLESCAPE_TBL_ITEM(8747, L"∫"), ///< int 	∫ 	U+222B (8747)
    TAG_HTMLESCAPE_TBL_ITEM(8756, L"∴"), ///< there4 	∴ 	U+2234 (8756)
    TAG_HTMLESCAPE_TBL_ITEM(8764, L"∼"), ///< sim 	∼ 	U+223C (8764)
    TAG_HTMLESCAPE_TBL_ITEM(8773, L"≅"), ///< cong 	≅ 	U+2245 (8773)
    TAG_HTMLESCAPE_TBL_ITEM(8776, L"≈"), ///< asymp 	≈ 	U+2248 (8776)

    TAG_HTMLESCAPE_TBL_ITEM(8800, L"≠"), ///< ne 	≠ 	U+2260 (8800)
    TAG_HTMLESCAPE_TBL_ITEM(8801, L"≡"), ///< equiv 	≡ 	U+2261 (8801)
    TAG_HTMLESCAPE_TBL_ITEM(8804, L"≤"), ///< le 	≤ 	U+2264 (8804)
    TAG_HTMLESCAPE_TBL_ITEM(8805, L"≥"), ///< ge 	≥ 	U+2265 (8805)
    TAG_HTMLESCAPE_TBL_ITEM(8834, L"⊂"), ///< sub 	⊂ 	U+2282 (8834)
    TAG_HTMLESCAPE_TBL_ITEM(8835, L"⊃"), ///< sup 	⊃ 	U+2283 (8835)
    TAG_HTMLESCAPE_TBL_ITEM(8836, L"⊄"), ///< nsub 	⊄ 	U+2284 (8836)
    TAG_HTMLESCAPE_TBL_ITEM(8838, L"⊆"), ///< sube 	⊆ 	U+2286 (8838)
    TAG_HTMLESCAPE_TBL_ITEM(8839, L"⊇"), ///< supe 	⊇ 	U+2287 (8839)
    TAG_HTMLESCAPE_TBL_ITEM(8853, L"⊕"), ///< oplus 	⊕ 	U+2295 (8853)
    TAG_HTMLESCAPE_TBL_ITEM(8855, L"⊗"), ///< otimes 	⊗ 	U+2297 (8855)
    TAG_HTMLESCAPE_TBL_ITEM(8869, L"⊥"), ///< perp 	⊥ 	U+22A5 (8869)

    TAG_HTMLESCAPE_TBL_ITEM(8901, L"⋅"), ///< sdot 	⋅ 	U+22C5 (8901)
    TAG_HTMLESCAPE_TBL_ITEM(8968, L"⌈"), ///< lceil 	⌈ 	U+2308 (8968)
    TAG_HTMLESCAPE_TBL_ITEM(8969, L"⌉"), ///< rceil 	⌉ 	U+2309 (8969)
    TAG_HTMLESCAPE_TBL_ITEM(8970, L"⌊"), ///< lfloor 	⌊ 	U+230A (8970)
    TAG_HTMLESCAPE_TBL_ITEM(8971, L"⌋"), ///< rfloor 	⌋ 	U+230B (8971)

    TAG_HTMLESCAPE_TBL_ITEM(9001, L"⟨"), ///< lang 	〈 	U+2329 (9001)
    TAG_HTMLESCAPE_TBL_ITEM(9002, L"⟩"), ///< rang 	〉 	U+232A (9002)

    TAG_HTMLESCAPE_TBL_ITEM(9674, L"◊"), ///< loz 	◊ 	U+25CA (9674)

    TAG_HTMLESCAPE_TBL_ITEM(9824, L"♠"), ///< spades 	♠ 	U+2660 (9824)
    TAG_HTMLESCAPE_TBL_ITEM(9827, L"♣"), ///< clubs 	♣ 	U+2663 (9827)
    TAG_HTMLESCAPE_TBL_ITEM(9829, L"♥"), ///< hearts 	♥ 	U+2665 (9829)
    TAG_HTMLESCAPE_TBL_ITEM(9830, L"♦"), ///< diams 	♦ 	U+2666 (9830)
};

// 9830 is the max value of html escape character
// we will take the value as the position index on g_pArr_tag_htmlescape_tbl_item
#define MAX_VALUE_HTML_ESCAPE_CHAR (9830 + 1)

namespace ns_base
{
    void init_string_convert_escape_character()
    {
        int iIndex = 0;
        int iPos = 0;

        if (NULL == g_pArr_tag_htmlescape_tbl_item)
        {
            g_pArr_tag_htmlescape_tbl_item = new TAG_HTMLESCAPE_TBL_ITEM[MAX_VALUE_HTML_ESCAPE_CHAR];

            for (iIndex = 0; iIndex < g_iSize_HtmlEscapeTbl; iIndex++)
            {
                iPos = g_HtmlEscapeTbl[iIndex].cValue;

                g_pArr_tag_htmlescape_tbl_item[iPos].bValidData = TRUE;
                g_pArr_tag_htmlescape_tbl_item[iPos].cValue = g_HtmlEscapeTbl[iIndex].cValue;
                g_pArr_tag_htmlescape_tbl_item[iPos].strHtmlEscape = g_HtmlEscapeTbl[iIndex].strHtmlEscape.c_str();
            }
        }
    }

    void uninit_string_convert_escape_character()
    {
        SAFE_DELETE_ARRAY(g_pArr_tag_htmlescape_tbl_item);
    }

    std::string string_convert_escape_character_a(IN std::string strIn)
    {
        std::wstring        strTemp = L"";
        std::string         strOutA = "";

        strTemp = string_convert_escape_character_w(ns_base::A2Wex(strIn.c_str()));
        strOutA = ns_base::W2Aex(strTemp.c_str());
        return strOutA;
    }

    std::wstring string_convert_escape_character_w(IN std::wstring strIn)
    {
        std::wstring         strTemp = L"";

        string2HtmlEscape(strIn, strTemp);
        return strTemp;
    }

    BOOL string2HtmlEscape(IN const std::wstring& strIn, OUT std::wstring& strHtmlEscape)
    {
        size_t      nIndex = 0;
        WCHAR       cCur = L'\0';
        size_t      nLenInW = _tcslen(strIn.c_str());
        std::wstring        strTemp = L"";

        strHtmlEscape = L"";
        for (nIndex = 0; nIndex < nLenInW; nIndex++)
        {
            cCur = strIn[nIndex];
            char2HtmlEscape(cCur, strTemp);
            strHtmlEscape += strTemp.c_str();
        }

        return TRUE;
    }

    BOOL char2HtmlEscape(IN WCHAR cIn, OUT std::wstring& strHtmlEscape)
    {
        if (IsHtmlEscape(cIn))
        {
            strHtmlEscape = g_pArr_tag_htmlescape_tbl_item[cIn].strHtmlEscape.c_str();
        }
        else
        {
            strHtmlEscape = cIn;
        }

        return TRUE;
    }

    BOOL IsHtmlEscape(IN WCHAR cIn)
    {
        BOOL        bRc = FALSE;

        do 
        {
            if ((cIn >= MAX_VALUE_HTML_ESCAPE_CHAR)
                || (!g_pArr_tag_htmlescape_tbl_item[cIn].bValidData))
            {
                break;
            }

            bRc = TRUE;
        } while (0);

        return bRc;
    }
}




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值