众所周知,当直接将word中的内容复制到网页上时,会产生很多冗余代码;
而现在,在线编辑器又很普遍;就包括,现在用的百度空间的这个文本编辑器,如果直接从WORD中写好的文章复制进来,本来没有几个字,结果,它会提示,超出最大字数;也就是因为冗余代码过多的原因;
而用户直接拷贝Word的事儿很常见;以前我也写过一个清理的方法;在我的空间也有;当时只是为了让文章在显示时,很够统一格式;清理了一些多余样式,然后用CSS控件其样式,如果控件不了的,就用!importan强制定义;呵呵;
虽然清理了样式,但也遗留了不少,例如,<font style="color:red">示例</font>;清理后成了<font>示例></font>;其实这时font标签已经没有用了,不如直接清除字;
于是就又写了一些方法;把几个常用的简单方法发上来吧,很简单;
- using System;
- using System.Collections.Generic;
- using System.Text;
- using System.Text.RegularExpressions;
- namespace Extend
- {
- public class Article
- {
- #region 清理HTML标签
- /// <summary>
- /// 清理HTML标签的多余样式;如<div style="color:#454353">示例</div>;换成<div>示例</div>
- /// </summary>
- /// <param name="str">原始文本</param>
- /// <param name="element">要清除的标签</param>
- /// <returns></returns>
- public static string ClearElement(string str, string element)
- {
- string old = @"<" + element + "[^>]+>";
- string rep = "<" + element + ">";
- str = Regex.Replace(str, old, rep, RegexOptions.IgnoreCase);
- return str;
- }
- /// <summary>
- /// 清除HTML标签;如<div style="color:#454353">示例</div>;换成:示例
- /// </summary>
- /// <param name="str">原始文本</param>
- /// <param name="element">要清除的标签</param>
- /// <returns></returns>
- public static string ReMoveElement(string str,string element)
- {
- string regFront = @"<" + element + "[^>]*>";
- string regAfter = "</" + element + ">";
- str = Regex.Replace(str, regFront, "", RegexOptions.IgnoreCase);
- str = Regex.Replace(str, regAfter, "", RegexOptions.IgnoreCase);
- return str;
- }
- /// <summary>
- /// 清理指定字符串,大小写不敏感
- /// </summary>
- /// <param name="strText">原始文本</param>
- /// <param name="strOld">要替换的字符串,支持正则表达式,大小写不敏感</param>
- /// <param name="strNew">替换后的字符串</param>
- /// <returns></returns>
- public static string RegexReplace(string strText,string strOld,string strNew)
- {
- strText = Regex.Replace(strText, strOld, strNew, RegexOptions.IgnoreCase);
- return strText;
- }
- /// <summary>
- /// 清理Word的样式,主要是一些带冒号的标签,如o:p
- /// </summary>
- /// <param name="strText"></param>
- /// <returns></returns>
- public static string ClearWordStyle(string strText)
- {
- string regFront = @"<\w+:[^>]*>";
- string regAfter = @"</\w+:[^>]*>";
- strText = Regex.Replace(strText, regFront, "", RegexOptions.IgnoreCase);
- strText = Regex.Replace(strText, regAfter, "", RegexOptions.IgnoreCase);
- return strText;
- }
- #endregion
- }
- }
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
namespace Extend
{
public class Article
{
#region 清理HTML标签
/// <summary>
/// 清理HTML标签的多余样式;如<div style="color:#454353">示例</div>;换成<div>示例</div>
/// </summary>
/// <param name="str">原始文本</param>
/// <param name="element">要清除的标签</param>
/// <returns></returns>
public static string ClearElement(string str, string element)
{
string old = @"<" + element + "[^>]+>";
string rep = "<" + element + ">";
str = Regex.Replace(str, old, rep, RegexOptions.IgnoreCase);
return str;
}
/// <summary>
/// 清除HTML标签;如<div style="color:#454353">示例</div>;换成:示例
/// </summary>
/// <param name="str">原始文本</param>
/// <param name="element">要清除的标签</param>
/// <returns></returns>
public static string ReMoveElement(string str,string element)
{
string regFront = @"<" + element + "[^>]*>";
string regAfter = "</" + element + ">";
str = Regex.Replace(str, regFront, "", RegexOptions.IgnoreCase);
str = Regex.Replace(str, regAfter, "", RegexOptions.IgnoreCase);
return str;
}
/// <summary>
/// 清理指定字符串,大小写不敏感
/// </summary>
/// <param name="strText">原始文本</param>
/// <param name="strOld">要替换的字符串,支持正则表达式,大小写不敏感</param>
/// <param name="strNew">替换后的字符串</param>
/// <returns></returns>
public static string RegexReplace(string strText,string strOld,string strNew)
{
strText = Regex.Replace(strText, strOld, strNew, RegexOptions.IgnoreCase);
return strText;
}
/// <summary>
/// 清理Word的样式,主要是一些带冒号的标签,如o:p
/// </summary>
/// <param name="strText"></param>
/// <returns></returns>
public static string ClearWordStyle(string strText)
{
string regFront = @"<\w+:[^>]*>";
string regAfter = @"</\w+:[^>]*>";
strText = Regex.Replace(strText, regFront, "", RegexOptions.IgnoreCase);
strText = Regex.Replace(strText, regAfter, "", RegexOptions.IgnoreCase);
return strText;
}
#endregion
}
}
以上只是清理的方法;实际操作时,可以这样写;
- /// <summary>
- /// 替换新闻内容中的Html标签的多余属性
- /// </summary>
- /// <param name="str"></param>
- /// <returns></returns>
- private string ArtilceClear(string str)
- {
- if (str == "" || str == null || string.IsNullOrEmpty(str))
- return "";
- //清理word标签,如o:p之类,带冒号的
- str = Extend.Article.ClearWordStyle(str);
- string[] el;
- //清理样式
- el = new string[] { "p", "div","table","tr","td" };
- foreach (string s in el)
- {
- try
- {
- str = Extend.Article.ClearElement(str, s);
- }
- catch
- {
- continue;
- }
- }
- //清除样式
- el = new string[] { "span", "strong", "font", "h1", "tbody","o:p" };
- foreach (string s in el)
- {
- try
- {
- str = Extend.Article.ReMoveElement(str, s);
- //while (str.IndexOf("</"+s+">") >-1)
- //{
- // str = Extend.Article.ReMoveElement(s, str);
- //}
- }
- catch
- {
- continue;
- }
- }
- str = Extend.Article.RegexReplace(str," ","");
- return str;
- }
/// <summary>
/// 替换新闻内容中的Html标签的多余属性
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
private string ArtilceClear(string str)
{
if (str == "" || str == null || string.IsNullOrEmpty(str))
return "";
//清理word标签,如o:p之类,带冒号的
str = Extend.Article.ClearWordStyle(str);
string[] el;
//清理样式
el = new string[] { "p", "div","table","tr","td" };
foreach (string s in el)
{
try
{
str = Extend.Article.ClearElement(str, s);
}
catch
{
continue;
}
}
//清除样式
el = new string[] { "span", "strong", "font", "h1", "tbody","o:p" };
foreach (string s in el)
{
try
{
str = Extend.Article.ReMoveElement(str, s);
//while (str.IndexOf("</"+s+">") >-1)
//{
// str = Extend.Article.ReMoveElement(s, str);
//}
}
catch
{
continue;
}
}
str = Extend.Article.RegexReplace(str," ","");
return str;
}
注意看“清理”还是“清除”;像P、div、table等,是不能清除的,只是将它们的样式清理一下,将冗余代码去掉,该标签并不删除;而像span、font、o:p等,可以连标签清除掉;
上面的代码,只作为参考;更复杂的按条件清理,可以参看我以前的文章;一般的清理,上面的代码,也足够了
当前路径:editor/dialog/fck_paste.html
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<!--
* FCKeditor - The text editor for Internet - http://www.fckeditor.net
* Copyright (C) 2003-2007 Frederico Caldeira Knabben
*
* == BEGIN LICENSE ==
*
* Licensed under the terms of any of the following licenses at your
* choice:
*
* - GNU General Public License Version 2 or later (the "GPL")
* http://www.gnu.org/licenses/gpl.html
*
* - GNU Lesser General Public License Version 2.1 or later (the "LGPL")
* http://www.gnu.org/licenses/lgpl.html
*
* - Mozilla Public License Version 1.1 or later (the "MPL")
* http://www.mozilla.org/MPL/MPL-1.1.html
*
* == END LICENSE ==
*
* This dialog is shown when, for some reason (usually security settings),
* the user is not able to paste data from the clipboard to the editor using
* the toolbar buttons or the context menu.
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title></title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="robots" content="noindex, nofollow" />
<script type="text/javascript">
var oEditor = window.parent.InnerDialogLoaded() ;
var FCK = oEditor.FCK;
var FCKTools = oEditor.FCKTools ;
var FCKConfig = oEditor.FCKConfig ;
window.onload = function ()
{
// First of all, translate the dialog box texts
oEditor.FCKLanguageManager.TranslatePage(document) ;
var sPastingType = window.parent.dialogArguments.CustomValue ;
if ( sPastingType == 'Word' || sPastingType == 'Security' )
{
if ( sPastingType == 'Security' )
document.getElementById( 'xSecurityMsg' ).style.display = '' ;
var oFrame = document.getElementById('frmData') ;
oFrame.style.display = '' ;
if ( oFrame.contentDocument )
oFrame.contentDocument.designMode = 'on' ;
else
oFrame.contentWindow.document.body.contentEditable = true ;
}
else
{
document.getElementById('txtData').style.display = '' ;
}
if ( sPastingType != 'Word' )
document.getElementById('oWordCommands').style.display = 'none' ;
window.parent.SetOkButton( true ) ;
window.parent.SetAutoSize( true ) ;
}
function Ok()
{
var sHtml ;
var sPastingType = window.parent.dialogArguments.CustomValue ;
if ( sPastingType == 'Word' || sPastingType == 'Security' )
{
var oFrame = document.getElementById('frmData') ;
var oBody ;
if ( oFrame.contentDocument )
oBody = oFrame.contentDocument.body ;
else
oBody = oFrame.contentWindow.document.body ;
if ( sPastingType == 'Word' )
{
// If a plugin creates a FCK.CustomCleanWord function it will be called instead of the default one
if ( typeof( FCK.CustomCleanWord ) == 'function' )
sHtml = FCK.CustomCleanWord( oBody, document.getElementById('chkRemoveFont').checked, document.getElementById('chkRemoveStyles').checked ) ;
else
sHtml = CleanWord( oBody, document.getElementById('chkRemoveFont').checked, document.getElementById('chkRemoveStyles').checked ) ;
}
else
sHtml = oBody.innerHTML ;
// Fix relative anchor URLs (IE automatically adds the current page URL).
var re = new RegExp( window.location + "#", "g" ) ;
sHtml = sHtml.replace( re, '#') ;
}
else
{
sHtml = oEditor.FCKTools.HTMLEncode( document.getElementById('txtData').value ) ;
sHtml = sHtml.replace( /\n/g, '<BR>' ) ;
}
oEditor.FCK.InsertHtml( sHtml ) ;
return true ;
}
function CleanUpBox()
{
var oFrame = document.getElementById('frmData') ;
if ( oFrame.contentDocument )
oFrame.contentDocument.body.innerHTML = '' ;
else
oFrame.contentWindow.document.body.innerHTML = '' ;
}
// This function will be called from the PasteFromWord dialog (fck_paste.html)
// Input: oNode a DOM node that contains the raw paste from the clipboard
// bIgnoreFont, bRemoveStyles booleans according to the values set in the dialog
// Output: the cleaned string
function CleanWord( oNode, bIgnoreFont, bRemoveStyles )
{
var html = oNode.innerHTML ;
html = html.replace(/<o:p>\s*<\/o:p>/g, '') ;
html = html.replace(/<o:p>.*?<\/o:p>/g, ' ') ;
// Remove mso-xxx styles.
html = html.replace( /\s*mso-[^:]+:[^;"]+;?/gi, '' ) ;
// Remove margin styles.
html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*;/gi, '' ) ;
html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*"/gi, "\"" ) ;
html = html.replace( /\s*TEXT-INDENT: 0cm\s*;/gi, '' ) ;
html = html.replace( /\s*TEXT-INDENT: 0cm\s*"/gi, "\"" ) ;
html = html.replace( /\s*TEXT-ALIGN: [^\s;]+;?"/gi, "\"" ) ;
html = html.replace( /\s*PAGE-BREAK-BEFORE: [^\s;]+;?"/gi, "\"" ) ;
html = html.replace( /\s*FONT-VARIANT: [^\s;]+;?"/gi, "\"" ) ;
html = html.replace( /\s*tab-stops:[^;"]*;?/gi, '' ) ;
html = html.replace( /\s*tab-stops:[^"]*/gi, '' ) ;
// Remove FONT face attributes.
if ( bIgnoreFont )
{
html = html.replace( /\s*face="[^"]*"/gi, '' ) ;
html = html.replace( /\s*face=[^ >]*/gi, '' ) ;
html = html.replace( /\s*FONT-FAMILY:[^;"]*;?/gi, '' ) ;
}
// Remove Class attributes
html = html.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3") ;
// Remove styles.
if ( bRemoveStyles )
html = html.replace( /<(\w[^>]*) style="([^\"]*)"([^>]*)/gi, "<$1$3" ) ;
// Remove empty styles.
html = html.replace( /\s*style="\s*"/gi, '' ) ;
html = html.replace( /<SPAN\s*[^>]*>\s* \s*<\/SPAN>/gi, ' ' ) ;
html = html.replace( /<SPAN\s*[^>]*><\/SPAN>/gi, '' ) ;
// Remove Lang attributes
html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3") ;
html = html.replace( /<SPAN\s*>(.*?)<\/SPAN>/gi, '$1' ) ;
html = html.replace( /<FONT\s*>(.*?)<\/FONT>/gi, '$1' ) ;
// Remove XML elements and declarations
html = html.replace(/<\\?\?xml[^>]*>/gi, '' ) ;
// Remove Tags with XML namespace declarations: <o:p><\/o:p>
html = html.replace(/<\/?\w+:[^>]*>/gi, '' ) ;
// Remove comments [SF BUG-1481861].
html = html.replace(/<\!--.*-->/g, '' ) ;
html = html.replace( /<(U|I|STRIKE)> <\/\1>/g, ' ' ) ;
html = html.replace( /<H\d>\s*<\/H\d>/gi, '' ) ;
// Remove "display:none" tags.
html = html.replace( /<(\w+)[^>]*\sstyle="[^"]*DISPLAY\s?:\s?none(.*?)<\/\1>/ig, '' ) ;
if ( FCKConfig.CleanWordKeepsStructure )
{
// The original <Hn> tag send from Word is something like this: <Hn style="margin-top:0px;margin-bottom:0px">
html = html.replace( /<H(\d)([^>]*)>/gi, '<h$1>' ) ;
// Word likes to insert extra <font> tags, when using MSIE. (Wierd).
html = html.replace( /<(H\d)><FONT[^>]*>(.*?)<\/FONT><\/\1>/gi, '<$1>$2</$1>' );
html = html.replace( /<(H\d)><EM>(.*?)<\/EM><\/\1>/gi, '<$1>$2</$1>' );
}
else
{
html = html.replace( /<H1([^>]*)>/gi, '<div$1><b><font size="6">' ) ;
html = html.replace( /<H2([^>]*)>/gi, '<div$1><b><font size="5">' ) ;
html = html.replace( /<H3([^>]*)>/gi, '<div$1><b><font size="4">' ) ;
html = html.replace( /<H4([^>]*)>/gi, '<div$1><b><font size="3">' ) ;
html = html.replace( /<H5([^>]*)>/gi, '<div$1><b><font size="2">' ) ;
html = html.replace( /<H6([^>]*)>/gi, '<div$1><b><font size="1">' ) ;
html = html.replace( /<\/H\d>/gi, '<\/font><\/b><\/div>' ) ;
// Transform <P> to <DIV>
var re = new RegExp( '(<P)([^>]*>.*?)(<\/P>)', 'gi' ) ; // Different because of a IE 5.0 error
html = html.replace( re, '<div$2<\/div>' ) ;
// Remove empty tags (three times, just to be sure).
// This also removes any empty anchor
html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '' ) ;
html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '' ) ;
html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '' ) ;
}
return html ;
}
</script>
</head>
<body style="overflow: hidden">
<table cellspacing="0" cellpadding="0" width="100%" border="0" style="height: 98%">
<tr>
<td>
<div id="xSecurityMsg" style="display: none">
<span fcklang="DlgPasteSec">Because of your browser security settings,
the editor is not able to access your clipboard data directly. You are required
to paste it again in this window.</span><br />
</div>
<div>
<span fcklang="DlgPasteMsg2">Please paste inside the following box using the keyboard
(<strong>Ctrl+V</strong>) and hit <strong>OK</strong>.</span><br />
</div>
</td>
</tr>
<tr>
<td valign="top" height="100%" style="border-right: #000000 1px solid; border-top: #000000 1px solid;
border-left: #000000 1px solid; border-bottom: #000000 1px solid">
<textarea id="txtData" cols="80" rows="5" style="border: #000000 1px; display: none;
width: 99%; height: 98%"></textarea>
<iframe id="frmData" src="javascript:void(0)" height="98%" width="99%" frameborder="0"
style="border-right: #000000 1px; border-top: #000000 1px; display: none; border-left: #000000 1px;
border-bottom: #000000 1px; background-color: #ffffff"></iframe>
</td>
</tr>
<tr id="oWordCommands">
<td>
<table border="0" cellpadding="0" cellspacing="0" width="100%">
<tr>
<td nowrap="nowrap">
<input id="chkRemoveFont" type="checkbox" checked="checked" />
<label for="chkRemoveFont" fcklang="DlgPasteIgnoreFont">
Ignore Font Face definitions</label>
<br />
<input id="chkRemoveStyles" type="checkbox" />
<label for="chkRemoveStyles" fcklang="DlgPasteRemoveStyles">
Remove Styles definitions</label>
</td>
<td align="right" valign="top">
<input type="button" fcklang="DlgPasteCleanBox" value="Clean Up Box" οnclick="CleanUpBox()" />
</td>
</tr>
</table>
</td>
</tr>
</table>
</body>
</html>
当前路径:editor/dialog/fck_paste.html<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"><!-- * FCKeditor - The text editor for Internet - http://www.fckeditor.net * Copyright (C) 2003-2007 Frederico Caldeira Knabben * * == BEGIN LICENSE == * * Licensed under the terms of any of the following licenses at your * choice: * * - GNU General Public License Version 2 or later (the "GPL") * http://www.gnu.org/licenses/gpl.html * * - GNU Lesser General Public License Version 2.1 or later (the "LGPL") * http://www.gnu.org/licenses/lgpl.html * * - Mozilla Public License Version 1.1 or later (the "MPL") * http://www.mozilla.org/MPL/MPL-1.1.html * * == END LICENSE == * * This dialog is shown when, for some reason (usually security settings), * the user is not able to paste data from the clipboard to the editor using * the toolbar buttons or the context menu.--><html xmlns="http://www.w3.org/1999/xhtml"><head> <title></title> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="robots" content="noindex, nofollow" /> <script type="text/javascript">var oEditor = window.parent.InnerDialogLoaded() ;var FCK = oEditor.FCK;var FCKTools = oEditor.FCKTools ;var FCKConfig = oEditor.FCKConfig ;window.onload = function (){ // First of all, translate the dialog box texts oEditor.FCKLanguageManager.TranslatePage(document) ; var sPastingType = window.parent.dialogArguments.CustomValue ; if ( sPastingType == 'Word' || sPastingType == 'Security' ) { if ( sPastingType == 'Security' ) document.getElementById( 'xSecurityMsg' ).style.display = '' ; var oFrame = document.getElementById('frmData') ; oFrame.style.display = '' ; if ( oFrame.contentDocument ) oFrame.contentDocument.designMode = 'on' ; else oFrame.contentWindow.document.body.contentEditable = true ; } else { document.getElementById('txtData').style.display = '' ; } if ( sPastingType != 'Word' ) document.getElementById('oWordCommands').style.display = 'none' ; window.parent.SetOkButton( true ) ; window.parent.SetAutoSize( true ) ;}function Ok(){ var sHtml ; var sPastingType = window.parent.dialogArguments.CustomValue ; if ( sPastingType == 'Word' || sPastingType == 'Security' ) { var oFrame = document.getElementById('frmData') ; var oBody ; if ( oFrame.contentDocument ) oBody = oFrame.contentDocument.body ; else oBody = oFrame.contentWindow.document.body ; if ( sPastingType == 'Word' ) { // If a plugin creates a FCK.CustomCleanWord function it will be called instead of the default one if ( typeof( FCK.CustomCleanWord ) == 'function' ) sHtml = FCK.CustomCleanWord( oBody, document.getElementById('chkRemoveFont').checked, document.getElementById('chkRemoveStyles').checked ) ; else sHtml = CleanWord( oBody, document.getElementById('chkRemoveFont').checked, document.getElementById('chkRemoveStyles').checked ) ; } else sHtml = oBody.innerHTML ; // Fix relative anchor URLs (IE automatically adds the current page URL). var re = new RegExp( window.location + "#", "g" ) ; sHtml = sHtml.replace( re, '#') ; } else { sHtml = oEditor.FCKTools.HTMLEncode( document.getElementById('txtData').value ) ; sHtml = sHtml.replace( /\n/g, '<BR>' ) ; } oEditor.FCK.InsertHtml( sHtml ) ; return true ;}function CleanUpBox(){ var oFrame = document.getElementById('frmData') ; if ( oFrame.contentDocument ) oFrame.contentDocument.body.innerHTML = '' ; else oFrame.contentWindow.document.body.innerHTML = '' ;}// This function will be called from the PasteFromWord dialog (fck_paste.html)// Input: oNode a DOM node that contains the raw paste from the clipboard// bIgnoreFont, bRemoveStyles booleans according to the values set in the dialog// Output: the cleaned stringfunction CleanWord( oNode, bIgnoreFont, bRemoveStyles ){ var html = oNode.innerHTML ; html = html.replace(/<o:p>\s*<\/o:p>/g, '') ; html = html.replace(/<o:p>.*?<\/o:p>/g, ' ') ; // Remove mso-xxx styles. html = html.replace( /\s*mso-[^:]+:[^;"]+;?/gi, '' ) ; // Remove margin styles. html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*;/gi, '' ) ; html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*"/gi, "\"" ) ; html = html.replace( /\s*TEXT-INDENT: 0cm\s*;/gi, '' ) ; html = html.replace( /\s*TEXT-INDENT: 0cm\s*"/gi, "\"" ) ; html = html.replace( /\s*TEXT-ALIGN: [^\s;]+;?"/gi, "\"" ) ; html = html.replace( /\s*PAGE-BREAK-BEFORE: [^\s;]+;?"/gi, "\"" ) ; html = html.replace( /\s*FONT-VARIANT: [^\s;]+;?"/gi, "\"" ) ; html = html.replace( /\s*tab-stops:[^;"]*;?/gi, '' ) ; html = html.replace( /\s*tab-stops:[^"]*/gi, '' ) ; // Remove FONT face attributes. if ( bIgnoreFont ) { html = html.replace( /\s*face="[^"]*"/gi, '' ) ; html = html.replace( /\s*face=[^ >]*/gi, '' ) ; html = html.replace( /\s*FONT-FAMILY:[^;"]*;?/gi, '' ) ; } // Remove Class attributes html = html.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3") ; // Remove styles. if ( bRemoveStyles ) html = html.replace( /<(\w[^>]*) style="([^\"]*)"([^>]*)/gi, "<$1$3" ) ; // Remove empty styles. html = html.replace( /\s*style="\s*"/gi, '' ) ; html = html.replace( /<SPAN\s*[^>]*>\s* \s*<\/SPAN>/gi, ' ' ) ; html = html.replace( /<SPAN\s*[^>]*><\/SPAN>/gi, '' ) ; // Remove Lang attributes html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3") ; html = html.replace( /<SPAN\s*>(.*?)<\/SPAN>/gi, '$1' ) ; html = html.replace( /<FONT\s*>(.*?)<\/FONT>/gi, '$1' ) ; // Remove XML elements and declarations html = html.replace(/<\\?\?xml[^>]*>/gi, '' ) ; // Remove Tags with XML namespace declarations: <o:p><\/o:p> html = html.replace(/<\/?\w+:[^>]*>/gi, '' ) ; // Remove comments [SF BUG-1481861]. html = html.replace(/<\!--.*-->/g, '' ) ; html = html.replace( /<(U|I|STRIKE)> <\/\1>/g, ' ' ) ; html = html.replace( /<H\d>\s*<\/H\d>/gi, '' ) ; // Remove "display:none" tags. html = html.replace( /<(\w+)[^>]*\sstyle="[^"]*DISPLAY\s?:\s?none(.*?)<\/\1>/ig, '' ) ; if ( FCKConfig.CleanWordKeepsStructure ) { // The original <Hn> tag send from Word is something like this: <Hn style="margin-top:0px;margin-bottom:0px"> html = html.replace( /<H(\d)([^>]*)>/gi, '<h$1>' ) ; // Word likes to insert extra <font> tags, when using MSIE. (Wierd). html = html.replace( /<(H\d)><FONT[^>]*>(.*?)<\/FONT><\/\1>/gi, '<$1>$2</$1>' ); html = html.replace( /<(H\d)><EM>(.*?)<\/EM><\/\1>/gi, '<$1>$2</$1>' ); } else { html = html.replace( /<H1([^>]*)>/gi, '<div$1><b><font size="6">' ) ; html = html.replace( /<H2([^>]*)>/gi, '<div$1><b><font size="5">' ) ; html = html.replace( /<H3([^>]*)>/gi, '<div$1><b><font size="4">' ) ; html = html.replace( /<H4([^>]*)>/gi, '<div$1><b><font size="3">' ) ; html = html.replace( /<H5([^>]*)>/gi, '<div$1><b><font size="2">' ) ; html = html.replace( /<H6([^>]*)>/gi, '<div$1><b><font size="1">' ) ; html = html.replace( /<\/H\d>/gi, '<\/font><\/b><\/div>' ) ; // Transform <P> to <DIV> var re = new RegExp( '(<P)([^>]*>.*?)(<\/P>)', 'gi' ) ; // Different because of a IE 5.0 error html = html.replace( re, '<div$2<\/div>' ) ; // Remove empty tags (three times, just to be sure). // This also removes any empty anchor html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '' ) ; html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '' ) ; html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '' ) ; } return html ;} </script></head><body style="overflow: hidden"> <table cellspacing="0" cellpadding="0" width="100%" border="0" style="height: 98%"> <tr> <td> <div id="xSecurityMsg" style="display: none"> <span fcklang="DlgPasteSec">Because of your browser security settings, the editor is not able to access your clipboard data directly. You are required to paste it again in this window.</span><br /> </div> <div> <span fcklang="DlgPasteMsg2">Please paste inside the following box using the keyboard (<strong>Ctrl+V</strong>) and hit <strong>OK</strong>.</span><br /> </div> </td> </tr> <tr> <td valign="top" height="100%" style="border-right: #000000 1px solid; border-top: #000000 1px solid; border-left: #000000 1px solid; border-bottom: #000000 1px solid"> <textarea id="txtData" cols="80" rows="5" style="border: #000000 1px; display: none; width: 99%; height: 98%"></textarea> <iframe id="frmData" src="javascript:void(0)" height="98%" width="99%" frameborder="0" style="border-right: #000000 1px; border-top: #000000 1px; display: none; border-left: #000000 1px; border-bottom: #000000 1px; background-color: #ffffff"></iframe> </td> </tr> <tr id="oWordCommands"> <td> <table border="0" cellpadding="0" cellspacing="0" width="100%"> <tr> <td nowrap="nowrap"> <input id="chkRemoveFont" type="checkbox" checked="checked" /> <label for="chkRemoveFont" fcklang="DlgPasteIgnoreFont"> Ignore Font Face definitions</label> <br /> <input id="chkRemoveStyles" type="checkbox" /> <label for="chkRemoveStyles" fcklang="DlgPasteRemoveStyles"> Remove Styles definitions</label> </td> <td align="right" valign="top"> <input type="button" fcklang="DlgPasteCleanBox" value="Clean Up Box" οnclick="CleanUpBox()" /> </td> </tr> </table> </td> </tr> </table></body></html>