【C#】知乎网站数学类文章通过html转义到CSDN

ZhihuArticle zhihuArticle = new ZhihuArticle(file);
using MariGold.HtmlParser;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace www.zhihu.com_equation_tex
{
    public class ZhihuArticle
    {
        public string file_name { get; set; }

        public ZhihuArticle(string filename)
        {
            if (!System.IO.File.Exists(filename)) return;

            string html = System.IO.File.ReadAllText(filename);
            MariGold.HtmlParser.HtmlTextParser htmlTextParser = new MariGold.HtmlParser.HtmlTextParser(html);
            htmlTextParser.Parse();
            var node = htmlTextParser.Current;
            string paragraph = "";
            do
            {
                int layer = 0;
                forview(node, ref layer,ref paragraph);
                node = node.Next;
                if (node == null) break;
            } while (node != null && node.Next != null);
            if (string.IsNullOrEmpty(paragraph))
            {
                System.Diagnostics.Debug.WriteLine($"  {paragraph}");
            }
            paragraph = "";
        }

        public void forview(IHtmlNode nodeParent, ref int layer, ref string paragraph)
        {
            if (nodeParent == null) return;
            layer++;
            if (nodeParent.HasChildren)
            {
                foreach (var item in nodeParent.Children)
                {
                    var node = item;
                    //do
                    //{
                    forview(node, ref layer, ref paragraph);
                    if (node.Tag == "p")
                    {
                        if (!string.IsNullOrEmpty(paragraph))
                        {
                            WriteLines(paragraph);
                        }
                        paragraph = "";
                    }
                    if (node.Tag == "img")
                    {
                        foreach (var itemAtt in node.Attributes)
                        {
                            if (itemAtt.Key == "src")
                            {

                            }
                            else if (itemAtt.Key == "data-formula")
                            {
                                // string data_formula = itemAtt.Value;
                                // string data_formula = itemAtt.Value.Replace($"&", $"\r\n");
                                string data_formula = itemAtt.Value.Replace($"&", $"&");
                                data_formula = data_formula.Replace(@"\begin{align}", @"\begin{aligned}");
                                data_formula = data_formula.Replace(@"\begin{array}", @"\begin{aligned}");
                                data_formula = data_formula.Replace(@"\end{align}", @"\end{aligned}");
                                data_formula = data_formula.Replace(@"\end{array}", @"\end{aligned}");
                                data_formula = data_formula.Replace(@"\begin{align*}", @"\begin{aligned}");
                                data_formula = data_formula.Replace(@"\end{align*}", @"\end{aligned}");
                                data_formula = data_formula.Replace(@"\color{red}x", @"\color{red}x\color{black}");
                                data_formula = data_formula.Replace(@"\color{red}y", @"\color{red}y\color{black}");
                                data_formula = data_formula.Replace(@"\color{red}z", @"\color{red}z\color{black}");
                                data_formula = data_formula.Replace(@"\color{red}或", @"\color{red}或\color{black}");

                               
                                if (itemAtt.Value.Length < 10)
                                {
                                  //  paragraph += $" 【{data_formula}】";
                                    paragraph += $"$$ {data_formula} $$";
                                }
                                else
                                {
                                    if (!string.IsNullOrEmpty(paragraph) )
                                    {
                                        WriteLines(paragraph);
                                    }
                                    paragraph = "";
                                    //System.Diagnostics.Debug.WriteLine($"【{data_formula}】");
                                    //System.Diagnostics.Debug.WriteLine($"$$\r\n{data_formula}\r\n$$");
                                    string last = data_formula.Substring(data_formula.Length - 2, 2);
                                    if (last == @"\\")
                                    {
                                        System.Diagnostics.Debug.WriteLine($"$${data_formula} $$");
                                    }
                                    else
                                    {
                                        System.Diagnostics.Debug.WriteLine($"$${data_formula}$$");
                                    }
                                }
                            }
                        }
                    }
                    if (node.IsText)
                    {
                        string tem = $"{layer}";
                        for (int i = 0; i < layer; i++)
                        {
                            tem += "\t";
                        }
                        paragraph += node.InnerHtml;
                        //  System.Diagnostics.Debug.WriteLine($"{tem}" + node.InnerHtml);
                    }
                    //  node = node.Next;
                    // } while (node != null && node.Next != null);
                }

            }
            layer--;
        }

        public void WriteLines(string paragraph)
        {
            if(paragraph.Contains("\r\n"))
            {
                string[] strs = paragraph.Split(new string[] { "\r\n" }, StringSplitOptions.None);
                foreach (var item in strs)
                {
                    WriteLineFor(item);
                }
            }
            else
            {
                WriteLineFor(paragraph);
            }
            
        }

        public void WriteLineFor(string paragraph)
        {
            if (!string.IsNullOrEmpty(paragraph))
            {
                if (paragraph.Contains("一、") ||
                paragraph.Contains("二、")  ||
                paragraph.Contains("三、")  ||
                paragraph.Contains("四、")  ||
                paragraph.Contains("五、")  ||
                paragraph.Contains("六、")  ||
                paragraph.Contains("七、") ||
                paragraph.Contains("八、") ||
                paragraph.Contains("九、") ||
                paragraph.Contains("十、") ||
                paragraph.Contains("十一、") 
                    )
                    {
                    paragraph = $"# {paragraph}";
                }
                

                if (paragraph.Contains("$$"))
                {
                    System.Diagnostics.Debug.WriteLine($"$${paragraph.Replace("$$", "")} $$");
                }
                else
                {
                    if (paragraph.Contains("1.") ||
                paragraph.Contains("2.") ||
                paragraph.Contains("3.") ||
                paragraph.Contains("4.") ||
                paragraph.Contains("5.") ||
                paragraph.Contains("6.") ||
                paragraph.Contains("7.") ||
                paragraph.Contains("8.") ||
                paragraph.Contains("9.") ||
                paragraph.Contains("10.") ||
                paragraph.Contains("11.")
                    )
                    {
                        paragraph = $"## {paragraph}";
                    }
                    System.Diagnostics.Debug.WriteLine($"{paragraph}");
                }
            }
            
        }
    }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值