在使用Html Editor时,有一个常用的操作,就是将Word中编辑好的内容直接复制到Html Editor中来,但这样会产生很多冗余的代码,而很多网站的Html编辑器都有字数限制, 冗余代码造成发布的文章字数超过限制。使用以下函数,可以去除冗余代码,并且尽可能的保留格式(字体信息会丢失)
function MSWordClean() {
var text = document.getElementById('oDiv').innerHTML;
text=text.replace(/<//?SPAN[^>]*>/gi,"");
text=text.replace(/<(/w[^>]*) class=([^ |>]*)([^>]*)/gi,"<$1$3");
text=text.replace(/<(/w[^>]*) style="([^"]*)"([^>]*)/gi,"<$1$3");
text=text.replace(/<(/w[^>]*) lang=([^ |>]*)([^>]*)/gi,"<$1$3");
text=text.replace(/<//?/?xml[^>]*>/gi,"");
text=text.replace(/<//?/w+:[^>]*>/gi,"");
text=text.replace(/<span[^>]*><//span[^>]*>/ig,"");
text=text.replace(/<span[^>]*><//span[^>]*>/ig,"");
text=text.replace(/<span><span>/ig,"<span>");
text=text.replace(/ /," ");
var tmp = new RegExp("(<P)([^>]*>.*?)(<//P>)","gi");
text=text.replace(tmp,"<div$2</div>");
document.getElementById('oDiv').innerHTML = text;
}
var text = document.getElementById('oDiv').innerHTML;
text=text.replace(/<//?SPAN[^>]*>/gi,"");
text=text.replace(/<(/w[^>]*) class=([^ |>]*)([^>]*)/gi,"<$1$3");
text=text.replace(/<(/w[^>]*) style="([^"]*)"([^>]*)/gi,"<$1$3");
text=text.replace(/<(/w[^>]*) lang=([^ |>]*)([^>]*)/gi,"<$1$3");
text=text.replace(/<//?/?xml[^>]*>/gi,"");
text=text.replace(/<//?/w+:[^>]*>/gi,"");
text=text.replace(/<span[^>]*><//span[^>]*>/ig,"");
text=text.replace(/<span[^>]*><//span[^>]*>/ig,"");
text=text.replace(/<span><span>/ig,"<span>");
text=text.replace(/ /," ");
var tmp = new RegExp("(<P)([^>]*>.*?)(<//P>)","gi");
text=text.replace(tmp,"<div$2</div>");
document.getElementById('oDiv').innerHTML = text;
}
下面是效果,可以粘一段Word文档去试试