首先在VS的NuGet里对要使用的项目引用HtmlAgilityPack包
读取页面并用HtmlAgilityPack获取页面的各项值
最后循环
GetUrlContent(100);//设置章节最大上限
private void GetUrlContent(int maxNum)
{
strUrl = "https://www.XXXXXXX.org/0/3/3730.html";//初始地址第一章
//WebProxy proxyObject = new WebProxy(IP, port);//这里暂不用的代理。
//向指定地址发送请求
while (true)
{
if (strUrl == "" || num > maxNum)
{
break;
}
#region 获取网站内容
HttpWebRequest HttpWReq = (HttpWebRequest)WebRequest.Create(strUrl);
//HttpWReq.Proxy = proxyObject;
HttpWReq.Timeout = 10000;
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
using (HttpWebResponse HttpWResp = (HttpWebResponse)HttpWReq.GetResponse())
{
using (StreamReader sr = new StreamReader(HttpWResp.GetResponseStream(), Encoding.GetEncoding("GBK")))
{
doc.Load(sr);
}
}
#endregion
strUrl = GetHrefAndWrite(doc, num);
num++;
}
}
3
/// <summary>
/// 获取下一章地址和写入本章节
/// </summary>
/// <param name="doc"></param>
/// <param name="num"></param>
/// <returns></returns>
private string GetHrefAndWrite(HtmlDocument doc, int num)
{
#region 第一章 创建文件
if (num == 0)
{
var title1 = doc.DocumentNode.SelectSingleNode("//span[@class='articletitle']//a").InnerText;//标题
txtFile = Server.MapPath("~/UpFiles/" + title1 + ".txt");
if (File.Exists(txtFile))
{
File.Delete(txtFile);
}
File.Create(txtFile).Close();
}
#endregion
var title2 = doc.DocumentNode.SelectSingleNode("//h3").InnerText;//章节名
var node = doc.DocumentNode.SelectSingleNode("//p[@class='articlecontent']").InnerText;
#region 写入文件主体
if (txtFile != "")
{
//File.WriteAllText(txtFile, "I LOVE YOU!wang na"); // 一定要绝对路径
using (StreamWriter sw = File.AppendText(txtFile))
{
sw.WriteLine(title2);//添加章节名
sw.WriteLine();//换行
var txtContent = node.Replace("<br />", "").Replace("<br/>", "").Replace(" ", " ");///n
sw.WriteLine(txtContent);
}
Thread.Sleep(2000);
}
#endregion
#region 最后获取下一章地址
var nextPageUrl = "";
var nextPageNode = doc.DocumentNode.SelectNodes("//p[@class='nrset']//a");
foreach (var item in nextPageNode)
{
if (item.InnerText.Contains("下一章"))
{
nextPageUrl = "https://www.XXXXX.org" + item.Attributes["href"].Value;
}
}
return nextPageUrl;
#endregion
}