在html文档中创建段落应使用,我可以在Internet Explorer中将整个HTML文档加载到文档片段中吗？...-CSDN博客

小提琴：http : //jsfiddle.net/JFSKe/6/

DocumentFragment没有实现DOM方法。使用document.createElement结合 innerHTML移除

和标签(即使当创建的元素是一个根元素，)。因此，应该在其他地方寻求解决方案。我创建了一个跨浏览器的字符串到DOM函数，该函数利用了不可见的嵌入式框架。

所有外部资源和脚本将被禁用。有关更多信息，请参见代码说明。

码

@param String html The string with HTML which has be converted to a DOM object

@param func callback (optional) Callback(HTMLDocument doc, function destroy)

@returns undefined if callback exists, else: Object

HTMLDocument doc DOM fetched from Parameter:html

function destroy Removes HTMLDocument doc. */

function string2dom(html, callback){

/* Sanitise the string */

html = sanitiseHTML(html); /*Defined at the bottom of the answer*/

/* Create an IFrame */

var iframe = document.createElement("iframe");

iframe.style.display = "none";

document.body.appendChild(iframe);

var doc = iframe.contentDocument || iframe.contentWindow.document;

doc.open();

doc.write(html);

doc.close();

function destroy(){

iframe.parentNode.removeChild(iframe);

}

if(callback) callback(doc, destroy);

else return {"doc": doc, "destroy": destroy};

}

/* @name sanitiseHTML

@param String html A string representing HTML code

@return String A new string, fully stripped of external resources.

All "external" attributes (href, src) are prefixed by data- */

function sanitiseHTML(html){

/* Adds a before every matched tag, so that unterminated quotes

aren't preventing the browser from splitting a tag. Test case:

'' */

var prefix = "";

/*Attributes should not be prefixed by these characters. This list is not

complete, but will be sufficient for this function.

(see http://www.w3.org/TR/REC-xml/#NT-NameChar) */

var att = "[^-a-z0-9:._]";

var tag = "

var any = "(?:[^<>\"']*(?:\"[^\"]*\"|'[^']*'))*?[^<>]*";

var etag = "(?:>|(?=

@name ae

@description Converts a given string in a sequence of the

original input and the HTML entity

@param String string String to convert

var entityEnd = "(?:;|(?!\\d))";

var ents = {" ":"(?:\\s| ?|*32"+entityEnd+"|*20"+entityEnd+")",

"(":"(?:\\(|*40"+entityEnd+"|*28"+entityEnd+")",

")":"(?:\\)|*41"+entityEnd+"|*29"+entityEnd+")",

".":"(?:\\.|*46"+entityEnd+"|*2e"+entityEnd+")"};

/*Placeholder to avoid tricky filter-circumventing methods*/

var charMap = {};

var s = ents[" "]+"*"; /* Short-hand space */

/* Important: Must be pre- and postfixed by < and >. RE matches a whole tag! */

function ae(string){

var all_chars_lowercase = string.toLowerCase();

if(ents[string]) return ents[string];

var all_chars_uppercase = string.toUpperCase();

var RE_res = "";

for(var i=0; i

var char_lowercase = all_chars_lowercase.charAt(i);

if(charMap[char_lowercase]){

RE_res += charMap[char_lowercase];

continue;

}

var char_uppercase = all_chars_uppercase.charAt(i);

var RE_sub = [char_lowercase];

RE_sub.push("*" + char_lowercase.charCodeAt(0) + entityEnd);

RE_sub.push("*" + char_lowercase.charCodeAt(0).toString(16) + entityEnd);

if(char_lowercase != char_uppercase){

RE_sub.push("*" + char_uppercase.charCodeAt(0) + entityEnd);

RE_sub.push("*" + char_uppercase.charCodeAt(0).toString(16) + entityEnd);

}

RE_sub = "(?:" + RE_sub.join("|") + ")";

RE_res += (charMap[char_lowercase] = RE_sub);

}

return(ents[string] = RE_res);

}

@name by

@description second argument for the replace function.

function by(match, group1, group2){

/* Adds a data-prefix before every external pointer */

return group1 + "data-" + group2

}

@name cr

@description Selects a HTML element and performs a

search-and-replace on attributes

@param String selector HTML substring to match

@param String attribute RegExp-escaped; HTML element attribute to match

@param String marker Optional RegExp-escaped; marks the prefix

@param String delimiter Optional RegExp escaped; non-quote delimiters

@param String end Optional RegExp-escaped; forces the match to

end before an occurence of when

quotes are missing

function cr(selector, attribute, marker, delimiter, end){

if(typeof selector == "string") selector = new RegExp(selector, "gi");

marker = typeof marker == "string" ? marker : "\\s*=";

delimiter = typeof delimiter == "string" ? delimiter : "";

end = typeof end == "string" ? end : "";

var is_end = end && "?";

var re1 = new RegExp("("+att+")("+attribute+marker+"(?:\\s*\"[^\""+delimiter+"]*\"|\\s*'[^'"+delimiter+"]*'|[^\\s"+delimiter+"]+"+is_end+")"+end+")", "gi");

html = html.replace(selector, function(match){

return prefix + match.replace(re1, by);

});

}

@name cri

@description Selects an attribute of a HTML element, and

performs a search-and-replace on certain values

@param String selector HTML element to match

@param String attribute RegExp-escaped; HTML element attribute to match

@param String front RegExp-escaped; attribute value, prefix to match

@param String flags Optional RegExp flags, default "gi"

@param String delimiter Optional RegExp-escaped; non-quote delimiters

@param String end Optional RegExp-escaped; forces the match to

end before an occurence of when

quotes are missing

function cri(selector, attribute, front, flags, delimiter, end){

if(typeof selector == "string") selector = new RegExp(selector, "gi");

flags = typeof flags == "string" ? flags : "gi";

var re1 = new RegExp("("+att+attribute+"\\s*=)((?:\\s*\"[^\"]*\"|\\s*'[^']*'|[^\\s>]+))", "gi");

end = typeof end == "string" ? end + ")" : ")";

var at1 = new RegExp('(")('+front+'[^"]+")', flags);

var at2 = new RegExp("(')("+front+"[^']+')", flags);

var at3 = new RegExp("()("+front+'(?:"[^"]+"|\'[^\']+\'|(?:(?!'+delimiter+').)+)'+end, flags);

var handleAttr = function(match, g1, g2){

if(g2.charAt(0) == '"') return g1+g2.replace(at1, by);

if(g2.charAt(0) == "'") return g1+g2.replace(at2, by);

return g1+g2.replace(at3, by);

};

html = html.replace(selector, function(match){

return prefix + match.replace(re1, handleAttr);

});

}

/* */

html = html.replace(new RegExp("");

/* Stripping all scripts */

html = html.replace(new RegExp("]*>", "gi"), "");

html = html.replace(/

cr(tag+any+att+"on[-a-z0-9:_.]+="+any+etag, "on[-a-z0-9:_.]+"); /* Event listeners */

cr(tag+any+att+"href\\s*="+any+etag, "href"); /* Linked elements */

cr(tag+any+att+"src\\s*="+any+etag, "src"); /* Embedded elements */

cr(" */

/* */

cr("

cr(/

cri(tag+any+att+"style\\s*="+any+etag, "style", ae("url")+s+ae("(")+s, 0, s+ae(")"), ae(")"));

/* IE7- CSS expression() */

cr(/

cri(tag+any+att+"style\\s*="+any+etag, "style", ae("expression")+s+ae("(")+s, 0, s+ae(")"), ae(")"));

return html.replace(new RegExp("(?:"+prefix+")+", "g"), prefix);

}

代码说明

该sanitiseHTML函数基于我的replace_all_rel_by_abs函数(请参阅此答案)。sanitiseHTML不过，该功能已完全重写，以实现最大的效率和可靠性。

此外，添加了一组新的RegExps来删除所有脚本和事件处理程序(包括CSS expression()，IE7-)。为确保所有标签均按预期进行解析，已调整标签的前缀为。要正确解析嵌套的“事件处理程序”以及未终止的引号，必须使用该前缀">。

这些正则表达式是使用内部函数动态创建cr/ cri(ç reate ř E放置[ 我 n线段])。这些函数接受参数列表，并创建和执行高级RE替换。为了确保HTML实体没有违反一个RegExp(refresh在可以写成各种方式)，动态创建的正则表达式的一部分被构造函数ae(一个纽约ê ntity)。

实际的替换是按功能完成的by(替换为)。在此实现中，在所有匹配的属性之前by添加data-。

所有出现的条带化。此步骤是必需的，因为CDATA节允许代码内包含字符串。执行此替换后，可以安全地进行下一个替换：

其余标签将被删除。

该标记将被删除

所有的事件侦听器和外部指针/属性(href，src，url())由前缀data-，如先前所描述。

IFrame创建一个对象。iframe不太可能泄漏内存(与htmlfile ActiveXObject相反)。IFrame变得不可见，并附加到文档中，以便可以访问DOM。document.write()用于将HTML写入IFrame。document.open()和document.close()用于清空文档的先前内容，以便生成的文档是给定html字符串的精确副本。

如果指定了回调函数，则将使用两个参数调用该函数。第一个参数是对所生成document对象的引用。该第二参数是一个函数被调用时它破坏所生成的DOM树。当您不再需要树时，应调用此函数。

如果未指定回调函数，则该函数将返回一个由两个属性(doc和destroy)组成的对象，这些属性的行为与前面提到的参数相同。

补充笔记

将designMode属性设置为“开”将阻止框架执行脚本(Chrome不支持)。如果

我找不到可靠的来源htmlfile activeXObject。根据此消息来源，htmlfile它比IFrames慢，并且更容易受到内存泄漏的影响。

所有受影响的属性(href，，src...)都以前缀data-。获得/改变这些属性中的一个例子示出了用于data-href：

elem.getAttribute("data-href")和elem.setAttribute("data-href", "...")

elem.dataset.href和elem.dataset.href = "..."。

外部资源已被禁用。结果，页面看起来可能完全不同：没有外部样式没有脚本样式没有图像：元素的大小可能完全不同。

例子

sanitiseHTML(html)

将此书签粘贴到位置栏中。它将提供一个选项来插入文本区域，以显示经过清理的HTML字符串。

javascript:void(function(){var s=document.createElement("script");s.src="http://rob.lekensteyn.nl/html-sanitizer.js";document.body.appendChild(s)})();

代码示例-string2dom(html)：

string2dom("

Test", function(doc, destroy){

alert(doc.title); /* Alert: "Test" */

destroy();

});

var test = string2dom("

alert(test.doc.getElementById("secret").tagName); /* Alert: "DIV" */

test.destroy();