protected string str = "
sdasasdsdd |
sds
aaassss 说是道 ";protected void Page_Load(object sender, EventArgs e)
{
//string regexstr = @"]*>"; //去除所有的标签
//@"" //去除所有脚本,中间部分也删除
// string regexstr = @"]*>"; //去除图片的正则
// string regexstr = @""; //去除所有标签,只剩br
// string regexstr = @"
"; //去除table里面的所有内容string regexstr = @""; //去除所有标签,只剩img,br,p
str = Regex.Replace(str, regexstr, string.Empty, RegexOptions.IgnoreCase);
}
asp中正则表达式去除HTML标记(窃自eWebEditor)
2009年12月31日 星期四 下午 12:40
function
ExecReg(re, content)
Dim
myRegExp, ResultString
Set
myRegExp
=
New
RegExp
myRegExp.Global
=
True
myRegExp.Pattern
=
re
ResultString
=
myRegExp.Replace(content,
""
)
ExecReg
=
ResultString
end function
function
DecodeFilter(html)
html
=
LCase
(html)
'
去除所有客户端脚本javascipt,vbscript,jscript,js,vbs,event,
html
=
ExecReg(
"
?script[^>]*>
"
, html)
html
=
ExecReg(
"
(javascript|jscript|vbscript|vbs):
"
, html)
html
=
ExecReg(
"
on(mouse|exit|error|click|key)
"
, html)
html
=
ExecReg(
"
"
, html)
'
去除表格
html = ExecReg( " ?table[^>]*> " , html) html = ExecReg( " ?tr[^>]*> " , html) html = ExecReg( " ?th[^>]*> " , html) html = ExecReg( " ?td[^>]*> " , html) html = ExecReg( " ?a[^>]*> " , html) html = ExecReg( " ?p[^>]*> " , html) html = ExecReg( " ?img[^>]*> " , html) html = ExecReg( " ?div[^>]*> " , html) html = ExecReg( " ?ul[^>]*> " , html) html = ExecReg( " ?li[^>]*> " , html) html = ExecReg( " ?tbody[^>]*> " , html) html = ExecReg( " ?h1[^>]*> " , html) html = ExecReg( " ?h2[^>]*> " , html) html = ExecReg( " ?h3[^>]*> " , html) html = ExecReg( " ?h4[^>]*> " , html) html = ExecReg( " ?h5[^>]*> " , html) html = ExecReg( " ?h6[^>]*> " , html) html = ExecReg( " ?b[^>]*> " , html) html = ExecReg( " ?strong[^>]*> " , html) ' 去除样式类class="" html = ExecReg( " (]+) class=[^ |^>]*([^>]*>) " , html) ' 去除样式style="" html = ExecReg( " (]+) style=""[^""]*""([^>]*>) " , html) ' 去除XML<?xml> html = ExecReg( " ?xml[^>]*> " , html) ' 去除命名空间 html = ExecReg( " ?[a-z]+:[^>]*> " , html) ' 去除字体 html = ExecReg( " ?font[^>]*> " , html) ' 去除字幕 html = ExecReg( " ?marquee[^>]*> " , html) ' 去除对象 html = ExecReg( " ?object[^>]*> " , html) html = ExecReg( " ?param[^>]*> " , html) html = ExecReg( " ?embed[^>]*> " , html) DecodeFilter = html end function Function RemoveHTML(strText) Dim RegEx Set RegEx = New RegExp RegEx.Pattern = "]*>" RegEx.Global = True RemoveHTML = RegEx.Replace(strText, "") End Function function nohtml(str) dim re Set re=new RegExp re.IgnoreCase =true re.Global=True re.Pattern="(/<.>)" str=re.replace(str," ") re.Pattern="(//[^/)" str=re.replace(str," ") str=replace(str," ","") str=replace(str," ","") nohtml=str set re=nothing end function |
---|