匹配单层html的小demo,应该能匹配大多数html字符串.多层(嵌套)html标签解析不出来.可能有小bug,我抛砖引玉下,哈哈.
1 using System;
2 using System.Collections.Generic;
3 using System.Linq;
4 using System.Text;
5 using System.Text.RegularExpressions;
6 using System.Threading.Tasks;
7
8 namespace ResolveHtmlText
9 {
10 class Program
11 {
12 static void Main(string[] args)
13 {
14 string text = @" <span style='color:#1F497D'><span>y<span></span> <span style='color:#1F497D;'>1</span> <span style='color:#1F497D;background-color:#123456'>2</span><span style='color:#1F497D;background-color:#123456;text-align:center'>3</span> <span style='color:#1F497D;background-color:#123456;text-align:center;'>4</span> <span style='color:#1F497D;background-color:#123456;text-align:center;tt-l: 134;'>5</span>ggjf<a>123456</a>";
15 Console.WriteLine("原字符串:" + text);
16 text = text.Replace("\"", "'");
17 text = text.Replace(""", "'");
18 text = text.Replace(" ", "");
19 text = text.Replace("<", "<");//将<的转义码<都替换成<
20 text = text.Replace(">", ">");//将>的转义码>都替换成>
21
22 //string matchStr = @"<\s*[a-zA-Z0-9]+\s*>[^<^>]*<\s*/\s*[a-zA-Z0-9]+\s*>";
23 string matchStr = @"<\s*[a-zA-Z0-9]+\s*[a-zA-Z]+\s*=\s*'\s*[a-zA-Z]"
24 + @"+\s*:\s*[^<^>];?'\s*>[^<^>]"
25 + @"*<\s*/\s*[a-zA-Z0-9]+\s*>|<\s*[a-zA-Z0-9]"
26 + @"+\s*(\s*[a-zA-Z-]+\s*=\s*'(\s*[a-zA-Z-]+\s*:"
27 + @"\s*[^:^;^<^>]+\s*;\s*)*(\s*[a-zA-Z-]+\s*:\s*"
28 + @"[^:^;^<^>]+\s*)\s*;?\s*'\s*)*"
29 + @"\s*>[^<^>]*<\s*/\s*[a-zA-Z0-9]+\s*>";
30
31 Regex htmlReg = new Regex(matchStr);
32 string result = null;
33 MatchCollection htmlMatchCollection = htmlReg.Matches(text);
34 StringBuilder sb = new StringBuilder();
35
36 foreach (Match m in htmlMatchCollection)
37 {
38 if (m != null && m.Groups != null && m.Groups.Count > 0)
39 {
40 string temp = m.Groups[0].Value;
41 Console.WriteLine("临时值:" + temp);
42 //Regex textReg1 = new Regex(@"[^<^>]+");
43 //Match textMatch1 = textReg1.Match(temp);
44 //if (textMatch1 != null && textMatch1.Groups != null && textMatch1.Groups.Count > 0)
45 //{
46 // result = textMatch1.Groups[0].Value;
47 // sb.Append(result);
48 //}
49 Regex textReg = new Regex(@">.+<");
50 Match textMatch = textReg.Match(temp);
51 if (textMatch != null && textMatch.Groups != null && textMatch.Groups.Count > 0)
52 {
53 result = textMatch.Groups[0].Value;
54 if (result.Length > 2)
55 {
56 result = result.Substring(1, result.Length - 2);
57 sb.Append(result);
58 }
59 }
60 }
61 }
62
63
64
65 Console.WriteLine("解析出的结果:" + sb.ToString());
66 Console.ReadLine();
67 }
68 }
69 }