1,程序使用了HtmlAgilityPack组件,在百度上爬前3页的莫一网站的关键词的排名,
using System;
using System.Net;using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Web;
using HtmlAgilityPack;
namespace 关键词排名.rpt.scan
{
public class Baidu
{
/// <summary>
/// 搜索地址
/// </summary>
private readonly string uri = "http://www.baidu.com/s?wd={0}&ie=utf-8&pn={1}";
/// <summary>
/// 搜索排名网址节点路径
/// </summary>
private static readonly string xpath = "//div[@id='content_left']/table/tr/td[@class='c-default']";
/// <summary>
/// 检查排名返回结果
/// </summary>
/// <param name="domains">网站</param>
/// <param name="keyword">关键词</param>
/// <returns></returns>
public string CheckRanking(string domains, string keyword)
{
var i = 0;
var start = 0;
var ranking = 0;
string html = null;
string searchUrl = null;
// CookieCollection mycookies = null;
WebClient wc = new WebClient();
wc.Encoding = Encoding.UTF8;
//只取前三页
for (i = 0; i < 3; i++)
{
start = i * 10;
searchUrl = string.Format(uri, Regex.Replace(HttpUtility.HtmlEncode(keyword.Trim()), "\\s+", "+"), start);
//获取html
try
{
var trys = 0;
//重试访问
TryAgain:
trys++;
//得到html,排名
html = wc.DownloadString(searchUrl);
if (string.IsNullOrEmpty(html))
{
//重试2次
if (trys < 3)
{
goto TryAgain;
}
//重试后仍然失败 ,执行其他操作
}
//分析关键词排名
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(html);
var nodes = htmlDoc.DocumentNode.SelectNodes(xpath);
foreach (var item in nodes)
{
ranking++;
html = item.InnerHtml;
if (string.IsNullOrEmpty(html))
{
continue;
}
var a = item.SelectSingleNode("h3[@class='t']/a");
var g = item.SelectSingleNode("div/span[@class='g']");
if (a == null || g == null)
{
continue;
}
var href = a.Attributes["href"].Value;
var span = g.InnerHtml.ToLower();
if (span.IndexOf("www.abc.com") > -1)//你的网站
{
return "匹配关键词成功,关键词排名为:" + ranking;
}
}
}
catch (Exception ex)
{
return ex.Message;
}
}
return "匹配失败,关键词的排名在30之外";
}
}
}