如上所述,以前很多次,HTML不是正规语言,因此不能用正则表达式进行分析。
您将不得不递归执行此操作;我建议爬行DOM对象。
尝试这样的事情......
function regexReplaceInnerText(curr_element) {
if (curr_element.childNodes.length <= 0) { // termination case:
// no children; this is a "leaf node"
if (curr_element.nodeName == "#text" || curr_element.nodeType == 3) { // node is text; not an empty tag like
if (curr_element.data.replace(/^\s*|\s*$/g, '') != "") { // node isn't just white space
// (you can skip this check if you want)
var text = curr_element.data;
text = text.replace(/ *(,|\.) *([^ 0-9])/g, '$1 $2');
curr_element.data = text;
}
}
} else {
// recursive case:
// this isn't a leaf node, so we iterate over all children and recurse
for (var i = 0; curr_element.childNodes[i]; i++) {
regexReplaceInnerText(curr_element.childNodes[i]);
}
}
}
// then get the element whose children's text nodes you want to be regex'd
regexReplaceInnerText(document.getElementsByTagName("body")[0]);
// or if you don't want to do the whole document...
regexReplaceInnerText(document.getElementById("ElementToRegEx"));