word中复制到编辑器时,替换冗余HTML代码,清理HTML格式,这是最常见的问题,其中方法之一。
js函数如下:
function cWord(html)
{
html = REReplaceNocase(html,’
\s*’,’’,’all’);
html = REReplaceNocase(html,’
.*?’,’ ’,’all’);
html = REReplaceNocase(html,’\s*mso-[^:]+:[^;"]+;?’,’’,’all’);
html = REReplaceNocase(html,’\s*MARGIN: 0cm 0cm 0pt\s*;’,’’,’all’);
html = REReplaceNocase(html,’\s*MARGIN: 0cm 0cm 0pt\s*"’,’\"’,’all’);
html = REReplaceNocase(html,’\s*TEXT-INDENT: 0cm\s*;’,’’,’all’);
html = REReplaceNocase(html,’\s*TEXT-INDENT: 0cm\s*"’,’\"’,’all’);
html = REReplaceNocase(html,’\s*TEXT-ALIGN: [^\s;]+;?"’,’\"’,’all’);
html = REReplaceNocase(html,’\s*PAGE-BREAK-BEFORE: [^\s;]+;?"’,’\"’,’all’);
html = REReplaceNocase(html,’\s*FONT-VARIANT: [^\s;]+;?"’,’\"’,’all’);
html = REReplaceNocase(html,’\s*tab-stops:[^;"]*;?’,’’,’all’);
html = REReplaceNocase(html,’\s*tab-stops:[^"]*’,’’,’all’);
html = REReplaceNocase(html,’\s*face="[^"]*"’,’’,’all’);
html = REReplaceNocase(html,’\s*face=[^ >]*’,’’,’all’);
html = REReplaceNocase(html,’\s*FONT-FAMILY:[^;"]*;?’,’’,’all’);
html = REReplaceNocase(html,’]*) class=([^ |>]*)([^>]*)’,’
html = REReplaceNocase(html,’]*) style="([^\"]*)"([^>]*)’,’
html = REReplaceNocase(html,’\s*style="\s*"’,’’,’all’);
html = REReplaceNocase(html,’]*>\s* \s*’,’ ’,’all’);
html = REReplaceNocase(html,’]*>’,’’,’all’);
html = REReplaceNocase(html,’]*) lang=([^ |>]*)([^>]*)’,’
html = REReplaceNocase(html,’(.*?)’,’\1’,’all’);
html = REReplaceNocase(html,’(.*?)’,’\1’,’all’);
html = REReplaceNocase(html,’]*>’,’’,’all’);
html = REReplaceNocase(html,’]*>’,’’,’all’);
html = REReplaceNocase(html,’\s*’,’’,’all’);
html = REReplaceNocase(html,’
]*)>’,’
html = REReplaceNocase(html,’
]*)>’,’
html = REReplaceNocase(html,’
]*)>’,’
html = REReplaceNocase(html,’
]*)>’,’
html = REReplaceNocase(html,’
]*)>’,’
html = REReplaceNocase(html,’
]*)>’,’
html = REReplaceNocase(html,’’,’
’,’all’);html = REReplaceNocase(html,’ ’,’ ’,’all’);
html = REReplaceNocase(html,’]+)[^>]*>\s*’,’’,’all’);
html = REReplaceNocase(html,’]+)[^>]*>\s*’,’’,’all’);
html = REReplaceNocase(html,’]+)[^>]*>\s*’,’’,’all’);
html = REReplaceNocase(html,’(
]*>.*?)()’,’
return html;
}
该方法简洁,可根据自己需要进行增删。