重点是先抓取<td>里的内容,其它的就好说了:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Text.RegularExpressions;
namespace ReadFile
{
class Program
{
static void Main(string[] args)
{
string FilePath = @"E:\vs\bd.txt";
StreamReader sr = new StreamReader(FilePath, Encoding.GetEncoding("UTF-8"));
string FileContent = sr.ReadToEnd();
FileContent = Regex.Replace(FileContent, "[\r\n\t]+", "");
string PatternTable = "(?i)<table class=\"result\"[^>]*?>(?:(?!</?table>)[\\s\\S])*?</table>";
string PatternBlock = "<td class=\"c-default\" >(?:(?!</?td>)[\\s\\S])*?</td>";
MatchCollection mc = Regex.Matches(FileContent, PatternBlock, RegexOptions.Multiline);
Match mm = Regex.Match(FileContent, PatternBlock, RegexOptions.Multiline);
//Debug.WriteLine(mm.Groups.Count);
foreach (Match mat in mc)
{
Console.WriteLine("================================================");
Console.WriteLine(mat.Value);
}
Console.Read();
}
}
}