相关步骤
1.根据url构建 HtmlDocument
2.利用xpath 查找想要的内容
如果查找内容唯一 用SelecteSingleNode直接定位
如果是多个,需要循环遍历
代码如下
static void Main(string[] args)
{
string url = " https://mp.weixin.qq.com/s/mWN67V3IOEUWk1sj3Kl1Xg";
Console.WriteLine("待解析的url:{0}",url);
HttpWebRequest httpWebRequest = WebRequest.Create(new Uri(@url)) as HttpWebRequest;
httpWebRequest.Method = "GET";
using (HttpWebResponse response = httpWebRequest.GetResponse() as HttpWebResponse)
{
Stream stream = response.GetResponseStream();
HtmlDocument htmlDoc = new HtmlDocument();
htmlDoc.Load(stream,Encoding.UTF8);
var titleNode = htmlDoc.DocumentNode.SelectSingleNode("//head/title");
var descNodes = htmlDoc.DocumentNode.SelectNodes("//div");
HtmlNode hn = null;
string descStr = url;
foreach (var item in descNodes)
{
var id = item.GetAttributeValue("id", "");
if (id == "js_content")
{
hn = item;
descStr = item.InnerText.Replace("\n", "").Trim();
break;
}
}
var ImgNodes = hn.SelectSingleNode("//div[@id='js_content']//img");
string imgStr = "";
imgStr = ImgNodes.GetAttributeValue("data-src", "");
string str = "";
if (titleNode != null)
str = titleNode.InnerText.Replace("\n", "").Trim();
Console.WriteLine("title:{0},desc:{1},img:{2}", str, descStr, imgStr);
hn=htmlDoc.DocumentNode.SelectSingleNode("//meta[@property='og:title']");
string titleNew= hn.GetAttributeValue("content","");
}
Console.ReadKey();
}