获取分类排名
List<Tuple<string, int>> sortRankingList = new List<Tuple<string, int>>();
string asin = "";
string url = "https://www.amazon.com/gp/product/" + asin;
string strHTML = "";
WebClient myWebClient = new WebClient();
Stream myStream = myWebClient.OpenRead(url);
StreamReader sr = new StreamReader(myStream, System.Text.Encoding.GetEncoding("utf-8"));
strHTML = sr.ReadToEnd();
myStream.Close();
if (strHTML.IndexOf("Amazon Best Sellers Rank") != -1)
{
strHTML = strHTML.Substring(strHTML.IndexOf("Amazon Best Sellers Rank"));
strHTML = strHTML.Substring(0, strHTML.IndexOf("</div>"));
string newStrHtml = "";
newStrHtml += "<!DOCTYPE html>" + System.Environment.NewLine;
newStrHtml += "<html lang='en-us' class='a-no-js' data-19ax5a9jf='dingo'>" + System.Environment.NewLine;
newStrHtml += "<head><title></title><meta charset='utf-8'/><meta http-equiv='x-dns-prefetch-control' content='on'><link rel='dns-prefetch' href='https://images-na.ssl-images-amazon.com'><link rel='dns-prefetch' href='https://m.media-amazon.com'><link rel='dns-prefetch' href='https://completion.amazon.com'><script type='text/javascript'>var ue_t0=ue_t0||+new Date();</script></head>" + System.Environment.NewLine;
newStrHtml += "<body>" + System.Environment.NewLine;
newStrHtml += "<ul><li>" + strHTML + "</ul>" + System.Environment.NewLine;
newStrHtml += "</body></html>";
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(newStrHtml);
var nodes = doc.DocumentNode.SelectNodes(".//li");
if (nodes != null)
{
for (int i = 0; i < nodes.Count; i++)
{
string sortName = "";
int ranking = 0;
if (i == 0)
{
string nodeText = nodes[i].InnerText;
nodeText = nodeText.Substring(nodeText.IndexOf("#"));
nodeText = nodeText.Substring(0, nodeText.IndexOf("in"));
sortName = nodes[i].SelectSingleNode(".//a[1]") != null ? nodes[i].SelectSingleNode(".//a[1]").InnerText : "";
sortName = sortName.Replace("See Top 100 in ", "");
ranking = int.Parse(nodeText.Replace("#", "").Replace(",", "").Trim());
}
else
{
sortName = nodes[i].SelectSingleNode(".//a") != null ? nodes[i].SelectSingleNode(".//a").InnerText : "";
var rankStr = nodes[i].SelectSingleNode(".//span[1]") != null ? nodes[i].SelectSingleNode(".//span[1]").InnerText : "";
ranking = int.Parse(rankStr.Replace("#", "").Replace(",", "").Trim());
}
if (string.IsNullOrEmpty(sortName) == false)
{
sortRankingList.Add(new Tuple<string, int>(sortName, ranking));
}
}
}
}
else if (strHTML.IndexOf("Best Sellers Rank") != -1)
{
strHTML = strHTML.Substring(strHTML.IndexOf("Best Sellers Rank"));
strHTML = strHTML.Substring(0, strHTML.IndexOf("</tr>"));
string newStrHtml = "";
newStrHtml += "<!DOCTYPE html>" + System.Environment.NewLine;
newStrHtml += "<html lang='en-us' class='a-no-js' data-19ax5a9jf='dingo'>" + System.Environment.NewLine;
newStrHtml += "<head><title></title><meta charset='utf-8'/><meta http-equiv='x-dns-prefetch-control' content='on'><link rel='dns-prefetch' href='https://images-na.ssl-images-amazon.com'><link rel='dns-prefetch' href='https://m.media-amazon.com'><link rel='dns-prefetch' href='https://completion.amazon.com'><script type='text/javascript'>var ue_t0=ue_t0||+new Date();</script></head>" + System.Environment.NewLine;
newStrHtml += "<body>" + System.Environment.NewLine;
newStrHtml += "<table><tr><th>" + strHTML + "</tr></table>" + System.Environment.NewLine;
newStrHtml += "</body></html>";
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(newStrHtml);
var nodes = doc.DocumentNode.SelectNodes(".//span/span");
if (nodes != null)
{
for (int i = 0; i < nodes.Count; i++)
{
string sortName = "";
int ranking = 0;
sortName = nodes[i].SelectSingleNode(".//a") != null ? nodes[i].SelectSingleNode(".//a").InnerText : "";
sortName = sortName.Replace("See Top 100 in ", "");
var rankStr = nodes[i].InnerText;
rankStr = rankStr.Substring(0, rankStr.IndexOf("in"));
ranking = int.Parse(rankStr.Replace("#", "").Replace(",", "").Trim());
if (string.IsNullOrEmpty(sortName) == false)
{
sortRankingList.Add(new Tuple<string, int>(sortName, ranking));
}
}
}
}
总评分、review数量
string asin = "";
string url = "https://www.amazon.com/gp/product/" + asin;
string strHTML = "";
WebClient myWebClient = new WebClient();
Stream myStream = myWebClient.OpenRead(url);
StreamReader sr = new StreamReader(myStream, System.Text.Encoding.GetEncoding("utf-8"));
strHTML = sr.ReadToEnd();
myStream.Close();
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(strHTML);
HtmlNode comment_star_node = doc.DocumentNode.SelectSingleNode("//span[@id='acrPopover']");
string comment_star_str = comment_star_node == null ? "" : comment_star_node.Attributes["title"].Value;
HtmlNode comment_review_node = doc.DocumentNode.SelectSingleNode("//span[@id='acrCustomerReviewText']");
string comment_review_str = comment_review_node == null ? "" : comment_review_node.InnerText;
//截取总评分和review数量
if (string.IsNullOrEmpty(comment_star_str) == false)
{
double star = double.Parse(comment_star_str.Replace("out of 5 stars", "").Trim());
}
if (string.IsNullOrEmpty(comment_review_str) == false)
{
int review = int.Parse(comment_review_str.Replace("ratings", "").Trim());
}