function ExecReg(re, content)
Dim myRegExp, ResultString
Set myRegExp = New RegExp
myRegExp.Global = True
myRegExp.Pattern = re
ResultString = myRegExp.Replace(content, "")
ExecReg = ResultString
end function
function DecodeFilter(html)
html = LCase(html)
' 去除所有客户端脚本javascipt,vbscript,jscript,js,vbs,event,
html = ExecReg("?script[^>]*>", html)
html = ExecReg("(javascript|jscript|vbscript|vbs):", html)
html = ExecReg("on(mouse|exit|error|click|key)", html)
html = ExecReg("", html)
' 去除表格
html = ExecReg("?table[^>]*>", html) html = ExecReg("?tr[^>]*>", html) html = ExecReg("?th[^>]*>", html) html = ExecReg("?td[^>]*>", html) html = ExecReg("?a[^>]*>", html) html = ExecReg("?p[^>]*>", html) html = ExecReg("?img[^>]*>", html) html = ExecReg("?div[^>]*>", html) html = ExecReg("?ul[^>]*>", html) html = ExecReg("?li[^>]*>", html) html = ExecReg("?tbody[^>]*>", html) html = ExecReg("?h1[^>]*>", html) html = ExecReg("?h2[^>]*>", html) html = ExecReg("?h3[^>]*>", html) html = ExecReg("?h4[^>]*>", html) html = ExecReg("?h5[^>]*>", html) html = ExecReg("?h6[^>]*>", html) html = ExecReg("?b[^>]*>", html) html = ExecReg("?strong[^>]*>", html) ' 去除样式类class="" html = ExecReg("(]+) class=[^ |^>]*([^>]*>)", html) ' 去除样式style="" html = ExecReg("(]+) style=""[^""]*""([^>]*>)", html) ' 去除XML<?xml> html = ExecReg("]*>", html) ' 去除命名空间 html = ExecReg("?[a-z]+:[^>]*>", html) ' 去除字体 html = ExecReg("?font[^>]*>", html) ' 去除字幕 html = ExecReg("?marquee[^>]*>", html) ' 去除对象 html = ExecReg("?object[^>]*>", html) html = ExecReg("?param[^>]*>", html) html = ExecReg("?embed[^>]*>", html) DecodeFilter= html end function 使用:DecodeFilter(内容) |
---|