Read Html string to DataTable

/// <summary>
    /// Read Html string to DataTable
    /// </summary>
    /// <param name="tempHtml"></param>
    /// <returns></returns>
    protected DataTable GetDataTableFromString(string tempHtml)
    {
        string tempString = tempHtml;
        DataTable dt = new DataTable();
        //Delete char from  <tr>  to  </tr>
        //删除<tr>之前和</tr>之后的字符
        int index = tempString.IndexOf("<tr");
        if (index > -1)
        {
            tempString = tempString.Substring(index);
        }
        else
        {
            return dt;
        }
        index = tempString.LastIndexOf("</tr>");
        if (index > -1)
        {
            tempString = tempString.Substring(0, index + 5);
        }
        else
        {
            return dt;
        }
        //如果原字符串中包含分隔符“^”,则先把它替换掉
        bool existSparator = false;
        char separator = Convert.ToChar("^");

        if (tempString.IndexOf(separator.ToString()) > -1)
        {
            existSparator = true;
            tempString = tempString.Replace("^", "^$&^");
        }

        //根据<tr>分拆字符串

        string[] tmpRow = tempString.Replace("</tr>", "^").Split(separator);
        for (int I = 0; I < tmpRow.Length - 1; I++)
        {
            DataRow newRow = dt.NewRow();
            string tmpStrI = tmpRow[I];
            if (tmpStrI.IndexOf("<tr>") > -1)
            {
                tmpStrI = tmpStrI.Substring(tmpStrI.IndexOf("<tr"));
                if (tmpStrI.IndexOf("display:none") < 0 || tmpStrI.IndexOf("display:none") > tmpStrI.IndexOf(">"))
                {
                    tmpStrI = tmpStrI.Replace("</td>", "^");
                    string[] tmpField = tmpStrI.Split(separator);

                    for (int J = 0; J < tmpField.Length - 1; J++)
                    {
                        tmpField[J] = RemoveString(tmpField[J], "<font>");
                        index = tmpField[J].LastIndexOf(">") + 1;

                        if (index > 0)
                        {
                            string field = tmpField[J].Substring(index, tmpField[J].Length - index);
                            if (existSparator)
                            {
                                field = field.Replace("^$&", "^");
                            }
                            if (I == 0)
                            {
                                string tmpfieldName = field;
                                int sn = 1;
                                while (dt.Columns.Contains(tmpfieldName))
                                {
                                    tmpfieldName = field + sn.ToString();
                                    sn += 1;
                                }
                                dt.Columns.Add(tmpfieldName);

                            }
                            else
                            {
                                newRow[J] = field;
                            }
                        }
                    }

                    if (I > 0)
                    {
                        dt.Rows.Add(newRow);
                    }
                }
            }
        }

        dt.AcceptChanges();
        return dt;

    }

    /// <summary>
    /// 删除指定的字符串
    /// </summary>
    /// <param name="tmpHtml"></param>
    /// <param name="remove"></param>
    /// <returns></returns>
    protected string RemoveString(string tmpHtml, string remove)
    {
        tmpHtml = tmpHtml.Replace(remove.Replace("<", "</"), "");
        tmpHtml = RemoveStringHead(tmpHtml, remove);
        return tmpHtml;
    }
    /// <summary>
    /// 删除HTML的标记
    /// </summary>
    /// <param name="tmpHtml"></param>
    /// <param name="remove"></param>
    /// <returns></returns>
    protected string RemoveStringHead(string tmpHtml, string remove)
    {
        if (remove.Length < 1)
        {
            return tmpHtml;
        }
        if ((remove.Substring(0, 1) != "<") || (remove.Substring(remove.Length - 1) != ">"))
        {
            return tmpHtml;
        }
        int indexS = tmpHtml.IndexOf(remove.Replace(">", ""));
        int indexE = -1;
        if (indexS > -1)
        {
            string tmpRight = tmpHtml.Substring(indexS, tmpHtml.Length - indexS);
            indexE = tmpRight.IndexOf(">");
            if (indexE > -1)
            {
                tmpHtml = tmpHtml.Substring(0, indexE) + tmpHtml.Substring(indexS + indexE + 1);
            }
            if (tmpHtml.IndexOf(remove.Replace(">", "")) > -1)
            {
                tmpHtml = RemoveStringHead(tmpHtml, remove);
            }

        }
        return tmpHtml;
    }

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值