using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace ConsoleApplication1
{
public class CMenu
{
public string Name { get; set; }
public List<CMenu> SubMenu { get; set; }
public List<CMenu> CategoriesMenu { get; set; }
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using HtmlAgilityPack;
using Newtonsoft.Json;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
Get58();
//GetMeiTuan();
//GetTmall();
}
static HtmlAgilityPack.HtmlDocument GetDoc(string html)
{
HtmlAgilityPack.HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(html);
return doc;
}
static void Get58()
{
string Onehtml = File.ReadAllText(AppDomain.CurrentDomain.BaseDirectory + "58\\one.txt");
HtmlAgilityPack.HtmlDocument Onedoc = GetDoc(Onehtml);
HtmlNodeCollection nodes = Onedoc.DocumentNode.SelectNodes("//div[@class=\"board\"]");
List<CMenu> ms = null;
if (nodes != null && nodes.Count > 0)
{
ms = new List<CMenu>();
CMenu m = null;
int i = 0;
foreach (HtmlNode n in nodes)
{
m = new CMenu();
HtmlAgilityPack.HtmlDocument subdoc = GetDoc(n.OuterHtml);
HtmlNode sub = subdoc.DocumentNode.SelectSingleNode("//h2[@class=\"margT0\"]");
m.Name = sub.InnerText.Trim();
HtmlNodeCollection towsubnodes = subdoc.DocumentNode.SelectNodes("//a");
if (towsubnodes != null && towsubnodes.Count > 0)
{
m.SubMenu = new List<CMenu>();
CMenu sm = null;
foreach (HtmlNode sn in towsubnodes)
{
sm = new CMenu();
sm.Name = sn.InnerText.Trim();
m.SubMenu.Add(sm);
}
}
ms.Add(m);
i++;
}
}
File.WriteAllText(AppDomain.CurrentDomain.BaseDirectory + "58Result.txt", JsonConvert.SerializeObject(ms));
}
static void GetMeiTuan()
{
string Onehtml = File.ReadAllText(AppDomain.CurrentDomain.BaseDirectory + "meituan\\one.txt");
string Twohtml = File.ReadAllText(AppDomain.CurrentDomain.BaseDirectory + "meituan\\two.txt");
HtmlAgilityPack.HtmlDocument Onedoc = GetDoc(Onehtml);
HtmlAgilityPack.HtmlDocument Twodoc = GetDoc(Twohtml);
//不行 ,HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//li[@class='j_MenuNav nav-item']");
HtmlNodeCollection nodes = Onedoc.DocumentNode.SelectNodes("//li");
List<CMenu> ms = null;
if (nodes != null && nodes.Count > 0)
{
ms = new List<CMenu>();
CMenu m = null;
int i = 0;
foreach (HtmlNode n in nodes)
{
m = new CMenu();
m.Name = n.InnerText.Trim();
//if (m.Name.Contains("图书音像"))
//{
//}
//附带分类
HtmlAgilityPack.HtmlDocument subdoc = GetDoc(n.OuterHtml);
HtmlNodeCollection subs = subdoc.DocumentNode.SelectNodes("//a");
if (subs != null && subs.Count > 0)
{
m.CategoriesMenu = new List<CMenu>();
CMenu sm = null;
foreach (HtmlNode sn in subs)
{
sm = new CMenu();
sm.Name = sn.InnerText.Trim();
m.CategoriesMenu.Add(sm);
}
}
//HtmlNode townodes = Twodoc.DocumentNode.SelectSingleNode("//div//div//div");
//HtmlAgilityPack.HtmlDocument townodesdoc = GetDoc(townodes.OuterHtml);
HtmlNodeCollection towsubnodes = Twodoc.DocumentNode.ChildNodes;//找到少一个active .SelectNodes("//div[@class=\"category-nav-detail\"]");
//[@class=\"title-text\"]
if (towsubnodes != null && towsubnodes.Count > 0)
{
m.SubMenu = new List<CMenu>();
HtmlAgilityPack.HtmlDocument threedoc = GetDoc(towsubnodes[i].OuterHtml);
HtmlNode threesubs = threedoc.DocumentNode.SelectSingleNode("//div[@class=\"detail-content\"]");
if (threesubs != null)
{
CMenu sm = null;
HtmlNodeCollection threesubs2 = GetDoc(threedoc.DocumentNode.SelectSingleNode("//div[@class=\"detail-content\"]").OuterHtml).DocumentNode.SelectNodes("//a");
foreach (HtmlNode tsn2 in threesubs2)
{
sm = new CMenu();
sm.Name = tsn2.InnerText.Trim();
m.SubMenu.Add(sm);
}
}
}
ms.Add(m);
i++;
}
}
File.WriteAllText(AppDomain.CurrentDomain.BaseDirectory + "MeiTuanResult.txt", JsonConvert.SerializeObject(ms));
}
static void GetTmall()
{
string Onehtml = File.ReadAllText(AppDomain.CurrentDomain.BaseDirectory + "one.txt");
string Twohtml = File.ReadAllText(AppDomain.CurrentDomain.BaseDirectory + "two.txt");
HtmlAgilityPack.HtmlDocument Onedoc = GetDoc(Onehtml);
HtmlAgilityPack.HtmlDocument Twodoc = GetDoc(Twohtml);
//不行 ,HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//li[@class='j_MenuNav nav-item']");
HtmlNodeCollection nodes = Onedoc.DocumentNode.SelectNodes("//li");
List<CMenu> ms = null;
if (nodes != null && nodes.Count > 0)
{
ms = new List<CMenu>();
CMenu m = null;
int i = 0;
foreach (HtmlNode n in nodes)
{
m = new CMenu();
m.Name = n.InnerText.Trim();
if (m.Name.Contains("图书音像"))
{
}
//附带分类
HtmlAgilityPack.HtmlDocument subdoc = GetDoc(n.OuterHtml);
HtmlNodeCollection subs = subdoc.DocumentNode.SelectNodes("//a");
if (subs != null && subs.Count > 0)
{
m.CategoriesMenu = new List<CMenu>();
CMenu sm = null;
foreach (HtmlNode sn in subs)
{
sm = new CMenu();
sm.Name = sn.InnerText.Trim();
m.CategoriesMenu.Add(sm);
}
}
HtmlNode townodes = Twodoc.DocumentNode.SelectSingleNode("//div[@class=\"pannel-" + i.ToString() + "\"]");
HtmlAgilityPack.HtmlDocument townodesdoc = GetDoc(townodes.OuterHtml);
HtmlNodeCollection towsubnodes = townodesdoc.DocumentNode.SelectNodes("//div//div//div");
//[@class=\"title-text\"]
if (towsubnodes != null && towsubnodes.Count > 0)
{
m.SubMenu = new List<CMenu>();
foreach (HtmlNode sn in towsubnodes)
{
HtmlAgilityPack.HtmlDocument threedoc = GetDoc(sn.OuterHtml);
HtmlNodeCollection threesubs = threedoc.DocumentNode.SelectNodes("//div[@class=\"hot-word-line\"]");
if (threesubs != null && threesubs.Count > 0)
{
CMenu sm = null;
foreach (HtmlNode tsn in threesubs)
{
HtmlNodeCollection title = GetDoc(tsn.OuterHtml).DocumentNode.SelectNodes("//div//div");
sm = new CMenu();
sm.Name = title[0].InnerText.Trim();
sm.SubMenu = new List<CMenu>();
CMenu sm2 = null;
HtmlAgilityPack.HtmlDocument threedoc2 = GetDoc(title[2].InnerHtml);
HtmlNodeCollection threesubs2 = threedoc2.DocumentNode.SelectNodes("//a");
foreach (HtmlNode tsn2 in threesubs2)
{
sm2 = new CMenu();
sm2.Name = tsn2.InnerText.Trim();
if (tsn2.InnerText.Contains("古筝"))
{
}
sm.SubMenu.Add(sm2);
}
m.SubMenu.Add(sm);
}
}
}
}
ms.Add(m);
i++;
}
}
File.WriteAllText(AppDomain.CurrentDomain.BaseDirectory + "result.txt", JsonConvert.SerializeObject(ms));
}
}
}