[Fact]
public void Regex_Test()
{
var rowHtml = @"<div class=\""container\"">
<div class=\""title dottedline\"">XXXXXX股份有限公司-受益所有人查询结果</div>
</div>
<div class=\""container\"">
<table class=\""gridtable\"">
<tr>
<th>序号</th>
<th>名称</th>
<th>注册币种</th>
<th>成立日期</th>
<th>注册资本</th>
<th>统一社会信用代码</th>
<th>持股占比</th>
<th>投资类型描述</th>
<th>是否上市</th>
<th>是否是受益人</th>
<th>受益人类型</th>
<th>职务</th>
<tr>
<tr>
<td>1</td>
<td>XXXXXX股份有限公司</td>
<td>人民币元</td>
<td>1985-11-22</td>
<td>35640625.708900</td>
<td>91100000100003962T</td>
<td></td>
<td></td>
<td>是</td>
<td>否</td>
<td></td>
<td></td>
</tr>
<tr>
<td>2</td>
<td>铎梦之</td>
<td></td>
<td></td>
<td></td>
<td></td>
<td></td>
<td></td>
<td></td>
<td>是</td>
<td>关键管理人员</td>
<td>董事</td>
</tr>
</table>
</div>";
if (!string.IsNullOrEmpty(rowHtml))
{
var regex = new Regex(@"<table.*?>[\s\S]*?<\/table>");
var mc = regex.Matches(rowHtml);
if (mc.Count > 0)
{
foreach (var item in mc)
{
var tableHtml = item;
/*
<table class=\"gridtable\">
<tr>
<th>序号</th>
<th>名称</th>
<th>注册币种</th>
<th>成立日期</th>
<th>注册资本</th>
<th>统一社会信用代码</th>
<th>持股占比</th>
<th>投资类型描述</th>
<th>是否上市</th>
<th>是否是受益人</th>
<th>受益人类型</th>
<th>职务</th>
<tr>
<tr>
<td>1</td>
<td>XXXXXX股份有限公司</td>
<td>人民币元</td>
<td>1985-11-22</td>
<td>35640625.708900</td>
<td>91100000100003962T</td>
<td></td>
<td></td>
<td>是</td>
<td>否</td>
<td></td>
<td></td>
</tr>
<tr>
<td>2</td>
<td>铎梦之</td>
<td></td>
<td></td>
<td></td>
<td></td>
<td></td>
<td></td>
<td></td>
<td>是</td>
<td>关键管理人员</td>
<td>董事</td>
</tr>
</table>
*/
}
}
Regex regTD = new Regex(@"(?<=<td>)(.*?)(?=</td>)", RegexOptions.IgnoreCase);//[^(<td>))]
var mc2 = regTD.Matches(rowHtml);
if (mc2.Count > 0)
{
var tdText = string.Empty;
foreach (var item in mc2)
{
tdText += $"{item},";
}
tdText = tdText.TrimEnd(new char[] { ',' });
/*
* 1,XXXXXX股份有限公司,人民币元,1985-11-22,35640625.708900,91100000100003962T,,,是,否,,,2,铎梦之,,,,,,,,是,关键管理人员,董事
*/
}
}
}