1、ASP版本
Function RemoveHTML(strHTML)
ON ERROR RESUME NEXT
Dim objRegExp, strOutput
strHTML = replace(strHTML,"; ",";")
strHTML = replace(strHTML,chr(13),";")
strHTML = replace(strHTML,chr(10),";")
strHTML = replace(strHTML,chr(32),";")
Set objRegExp = New Regexp
objRegExp.IgnoreCase = True ’忽略大小写
objRegExp.Global = True ’设置为全文搜索
objRegExp.Pattern = ""
strOutput = objRegExp.Replace(strHTML, "")
objRegExp.Pattern = "<script[^>]*?>.*?</script>"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern = "
[/s/S]+?
"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern = "
]*>"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern = "<(/w[^>]*) class=([^ |>]*)([^>]*)"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="<(/w[^>]*) style=’([^’]*)’([^>]*)"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="<(/w[^>]*) lang=([^ |>]*)([^>]*)"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="<
//?/?xml[^>]*>"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="
]*>"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="<(.[^>]*)>"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="([/r/n])[/s]+"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="-->"
strOutput = objRegExp.Replace(strOutput, "")
objRegExp.Pattern ="
", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"