在抓取html页时,需要过滤掉html代码,获取Html源代码中的Text,有正则表达式可以解决这个问题: VB.NET ' '' ----------------------------------------------------------------------------- ' '' <summary> ' '' 移除所有的html标签 ' '' </summary> ' '' <param name="HTML">html代码</param> ' '' <returns></returns> ' '' <remarks> ' '' </remarks> ' '' <history> ' '' [Administrator] 2004-9-25 Created ' '' </history> ' '' ----------------------------------------------------------------------------- Public Function ParseTags() Function ParseTags(ByVal HTML As String) As String ' 使用正则表达式识别并移除所有的html标签,返回过滤掉Html标签的文本 Dim objRegEx As System.Text.RegularExpressions.Regex Return objRegEx.Replace(HTML, "<[^>]*>", "") End Function C# /**/ /// <summary> /// 移除所有的html标签 /// </summary> /// <param name="HTML">html源代码</param> /// <returns></returns> public string ParseTags( string HTML) { return System.Text.RegularExpressions.Regex.Replace(HTML, "<[^>]*>", ""); } 提供一简单示例如下: VB.NET Private Sub Page_Load() Sub Page_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load Dim oStringBuilder As System.Text.StringBuilder oStringBuilder = New System.Text.StringBuilder oStringBuilder.Append(ControlChars.CrLf + "<!DOCTYPE HTML PUBLIC ""-//W3C//DTD HTML 4.0 Transitional//EN"">") oStringBuilder.Append(ControlChars.CrLf + "<HTML>") oStringBuilder.Append(ControlChars.CrLf + " <HEAD>") oStringBuilder.Append(ControlChars.CrLf + " <title>WebForm1</title>") oStringBuilder.Append(ControlChars.CrLf + " <meta name=""GENERATOR"" content=""Microsoft Visual Studio .NET 7.1"">") oStringBuilder.Append(ControlChars.CrLf + " <meta name=""CODE_LANGUAGE"" content=""Visual Basic .NET 7.1"">") oStringBuilder.Append(ControlChars.CrLf + " <meta name=""vs_defaultClientScript"" content=""JavaScript"">") oStringBuilder.Append(ControlChars.CrLf + " <meta name=""vs_targetSchema"" content=""http://schemas.microsoft.com/intellisense/ie5"">") oStringBuilder.Append(ControlChars.CrLf + " </HEAD>") oStringBuilder.Append(ControlChars.CrLf + " <body MS_POSITIONING=""GridLayout"">") oStringBuilder.Append(ControlChars.CrLf + " <form id=""Form1"" method=""post"" runat=""server"">") oStringBuilder.Append(ControlChars.CrLf + " <FONT face=""宋体"">测试</FONT>") oStringBuilder.Append(ControlChars.CrLf + " </form>") oStringBuilder.Append(ControlChars.CrLf + " </body>") oStringBuilder.Append(ControlChars.CrLf + "</HTML>") Response.Write(ParseTags(oStringBuilder.ToString)) End Sub C# private void Page_Load( object sender, System.EventArgs e) { System.Text.StringBuilder oStringBuilder; oStringBuilder = new System.Text.StringBuilder(); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + "<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">"); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + "<HTML>"); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + " <HEAD>"); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + " <title>WebForm1</title>"); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + " <meta name="GENERATOR" content="Microsoft Visual Studio .NET 7.1">"); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + " <meta name="CODE_LANGUAGE" content="Visual Basic .NET 7.1">"); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + " <meta name="vs_defaultClientScript" content="JavaScript">"); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + " <meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5">"); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + " </HEAD>"); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + " <body MS_POSITIONING="GridLayout">"); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + " <form id="Form1" method="post" runat="server">"); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + " <FONT face="宋体">测试</FONT>"); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + " </form>"); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + " </body>"); oStringBuilder.Append(Microsoft.VisualBasic.ControlChars.CrLf + "</HTML>"); Response.Write(ParseTags(oStringBuilder.ToString())); } 输出结果为: WebForm1 测试